diff options
Diffstat (limited to 'openmp/runtime/src/kmp_affinity.cpp')
-rw-r--r-- | openmp/runtime/src/kmp_affinity.cpp | 405 |
1 files changed, 268 insertions, 137 deletions
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 082ec9d6c73..d6821e0440f 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1,7 +1,7 @@ /* * kmp_affinity.cpp -- affinity management - * $Revision: 42810 $ - * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ + * $Revision: 43473 $ + * $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $ */ @@ -19,7 +19,7 @@ #include "kmp_i18n.h" #include "kmp_io.h" #include "kmp_str.h" - +#include "kmp_wrapper_getpid.h" #if KMP_AFFINITY_SUPPORTED @@ -49,7 +49,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) return buf; } - sprintf(scan, "{%ld", i); + sprintf(scan, "{%ld", (long)i); while (*scan != '\0') scan++; i++; for (; i < KMP_CPU_SETSIZE; i++) { @@ -66,7 +66,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) if (end - scan < 15) { break; } - sprintf(scan, ",%-ld", i); + sprintf(scan, ",%-ld", (long)i); while (*scan != '\0') scan++; } if (i < KMP_CPU_SETSIZE) { @@ -89,7 +89,6 @@ __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) if (__kmp_num_proc_groups > 1) { int group; - struct GROUP_AFFINITY ga; KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); for (group = 0; group < __kmp_num_proc_groups; group++) { int i; @@ -315,6 +314,106 @@ __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) return 0; } +/** A structure for holding machine-specific hierarchy info to be computed once at init. */ +class hierarchy_info { +public: + /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine, + etc. We don't want to get specific with nomenclature */ + static const kmp_uint32 maxLevels=7; + + /** This is specifically the depth of the machine configuration hierarchy, in terms of the + number of levels along the longest path from root to any leaf. It corresponds to the + number of entries in numPerLevel if we exclude all but one trailing 1. */ + kmp_uint32 depth; + kmp_uint32 base_depth; + kmp_uint32 base_num_threads; + bool uninitialized; + + /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a + node at level i has. For example, if we have a machine with 4 packages, 4 cores/package + and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ + kmp_uint32 numPerLevel[maxLevels]; + kmp_uint32 skipPerLevel[maxLevels]; + + void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { + int hier_depth = adr2os[0].first.depth; + int level = 0; + for (int i=hier_depth-1; i>=0; --i) { + int max = -1; + for (int j=0; j<num_addrs; ++j) { + int next = adr2os[j].first.childNums[i]; + if (next > max) max = next; + } + numPerLevel[level] = max+1; + ++level; + } + } + + hierarchy_info() : depth(1), uninitialized(true) {} + void init(AddrUnsPair *adr2os, int num_addrs) + { + uninitialized = false; + for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = 1; + skipPerLevel[i] = 1; + } + + // Sort table by physical ID + if (adr2os) { + qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels); + deriveLevels(adr2os, num_addrs); + } + else { + numPerLevel[0] = 4; + numPerLevel[1] = num_addrs/4; + if (num_addrs%4) numPerLevel[1]++; + } + + base_num_threads = num_addrs; + for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth + if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' + depth++; + + kmp_uint32 branch = 4; + if (numPerLevel[0] == 1) branch = num_addrs/4; + if (branch<4) branch=4; + for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width + while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0! + if (numPerLevel[d] & 1) numPerLevel[d]++; + numPerLevel[d] = numPerLevel[d] >> 1; + if (numPerLevel[d+1] == 1) depth++; + numPerLevel[d+1] = numPerLevel[d+1] << 1; + } + if(numPerLevel[0] == 1) { + branch = branch >> 1; + if (branch<4) branch = 4; + } + } + + for (kmp_uint32 i=1; i<depth; ++i) + skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1]; + + base_depth = depth; + } +}; + +static hierarchy_info machine_hierarchy; + +void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { + if (machine_hierarchy.uninitialized) + machine_hierarchy.init(NULL, nproc); + + if (nproc <= machine_hierarchy.base_num_threads) + machine_hierarchy.depth = machine_hierarchy.base_depth; + KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0); + while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) { + machine_hierarchy.depth++; + machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2]; + } + thr_bar->depth = machine_hierarchy.depth; + thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; + thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; +} // // When sorting by labels, __kmp_affinity_assign_child_nums() must first be @@ -1963,7 +2062,7 @@ __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, // A newline has signalled the end of the processor record. // Check that there aren't too many procs specified. // - if (num_avail == __kmp_xproc) { + if ((int)num_avail == __kmp_xproc) { CLEANUP_THREAD_INFO; *msg_id = kmp_i18n_str_TooManyEntries; return -1; @@ -2587,7 +2686,7 @@ static int nextNewMask; #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \ { \ if (((_osId) > _maxOsId) || \ - (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX(_osId2Mask, (_osId))))) {\ + (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ if (__kmp_affinity_verbose || (__kmp_affinity_warnings \ && (__kmp_affinity_type != affinity_none))) { \ KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ @@ -3045,14 +3144,15 @@ __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, (*setSize)++; } *scan = next; // skip num - } + } else { KMP_ASSERT2(0, "bad explicit places list"); } } -static void +//static void +void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, unsigned int *out_numMasks, const char *placelist, kmp_affin_mask_t *osId2Mask, int maxOsId) @@ -3109,71 +3209,41 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, // valid follow sets are ',' ':' and EOL // SKIP_WS(scan); + int stride; if (*scan == '\0' || *scan == ',') { - int i; - for (i = 0; i < count; i++) { - int j; - if (setSize == 0) { - break; - } - ADD_MASK(tempMask); - setSize = 0; - for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j > 0; j--) { - // - // Use a temp var in case macro is changed to evaluate - // args multiple times. - // - if (KMP_CPU_ISSET(j - 1, tempMask)) { - KMP_CPU_SET(j, tempMask); - setSize++; - } - else { - KMP_CPU_CLR(j, tempMask); - } + stride = +1; + } + else { + KMP_ASSERT2(*scan == ':', "bad explicit places list"); + scan++; // skip ':' + + // + // Read stride parameter + // + int sign = +1; + for (;;) { + SKIP_WS(scan); + if (*scan == '+') { + scan++; // skip '+' + continue; } - for (; j >= 0; j--) { - KMP_CPU_CLR(j, tempMask); + if (*scan == '-') { + sign *= -1; + scan++; // skip '-' + continue; } - } - KMP_CPU_ZERO(tempMask); - setSize = 0; - - if (*scan == '\0') { break; } - scan++; // skip ',' - continue; - } - - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; + KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), + "bad explicit places list"); + next = scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_DEBUG_ASSERT(stride >= 0); + scan = next; + stride *= sign; } - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), - "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - int stride = __kmp_str_to_int(scan, *next); - KMP_DEBUG_ASSERT(stride >= 0); - scan = next; - stride *= sign; if (stride > 0) { int i; @@ -3185,13 +3255,21 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, ADD_MASK(tempMask); setSize = 0; for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) { - if (KMP_CPU_ISSET(j - stride, tempMask)) { - KMP_CPU_SET(j, tempMask); - setSize++; + if (! KMP_CPU_ISSET(j - stride, tempMask)) { + KMP_CPU_CLR(j, tempMask); } - else { + else if ((j > maxOsId) || + (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, j); + } KMP_CPU_CLR(j, tempMask); } + else { + KMP_CPU_SET(j, tempMask); + setSize++; + } } for (; j >= 0; j--) { KMP_CPU_CLR(j, tempMask); @@ -3201,23 +3279,31 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, else { int i; for (i = 0; i < count; i++) { - unsigned j; + int j; if (setSize == 0) { break; } ADD_MASK(tempMask); setSize = 0; - for (j = 0; j < (__kmp_affin_mask_size * CHAR_BIT) + stride; + for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride; j++) { - if (KMP_CPU_ISSET(j - stride, tempMask)) { - KMP_CPU_SET(j, tempMask); - setSize++; + if (! KMP_CPU_ISSET(j - stride, tempMask)) { + KMP_CPU_CLR(j, tempMask); } - else { + else if ((j > maxOsId) || + (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) { + if (__kmp_affinity_verbose || (__kmp_affinity_warnings + && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, j); + } KMP_CPU_CLR(j, tempMask); } + else { + KMP_CPU_SET(j, tempMask); + setSize++; + } } - for (; j < __kmp_affin_mask_size * CHAR_BIT; j++) { + for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) { KMP_CPU_CLR(j, tempMask); } } @@ -3270,9 +3356,13 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) } __kmp_place_num_cores = nCoresPerPkg; // use all available cores } - if ( !__kmp_affinity_uniform_topology() || depth != 3 ) { - KMP_WARNING( AffThrPlaceUnsupported ); - return; // don't support non-uniform topology or not-3-level architecture + if ( !__kmp_affinity_uniform_topology() ) { + KMP_WARNING( AffThrPlaceNonUniform ); + return; // don't support non-uniform topology + } + if ( depth != 3 ) { + KMP_WARNING( AffThrPlaceNonThreeLevel ); + return; // don't support not-3-level topology } if ( __kmp_place_num_threads_per_core == 0 ) { __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts @@ -3400,18 +3490,14 @@ __kmp_aux_affinity_initialize(void) } if (depth < 0) { - if ((msg_id != kmp_i18n_null) - && (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none)))) { -# if KMP_MIC - if (__kmp_affinity_verbose) { + if (__kmp_affinity_verbose) { + if (msg_id != kmp_i18n_null) { KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), KMP_I18N_STR(DecodingLegacyAPIC)); } -# else - KMP_WARNING(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), - KMP_I18N_STR(DecodingLegacyAPIC)); -# endif + else { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); + } } file_name = NULL; @@ -3428,19 +3514,13 @@ __kmp_aux_affinity_initialize(void) # if KMP_OS_LINUX if (depth < 0) { - if ((msg_id != kmp_i18n_null) - && (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none)))) { -# if KMP_MIC - if (__kmp_affinity_verbose) { + if (__kmp_affinity_verbose) { + if (msg_id != kmp_i18n_null) { KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); } -# else - KMP_WARNING(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); -# endif - } - else if (__kmp_affinity_verbose) { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); + else { + KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); + } } FILE *f = fopen("/proc/cpuinfo", "r"); @@ -3461,20 +3541,32 @@ __kmp_aux_affinity_initialize(void) # endif /* KMP_OS_LINUX */ +# if KMP_OS_WINDOWS && KMP_ARCH_X86_64 + + if ((depth < 0) && (__kmp_num_proc_groups > 1)) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); + } + + depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); + KMP_ASSERT(depth != 0); + } + +# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */ + if (depth < 0) { - if (msg_id != kmp_i18n_null - && (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none)))) { + if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { if (file_name == NULL) { - KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id)); + KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); } else if (line == 0) { - KMP_WARNING(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); + KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); } else { - KMP_WARNING(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id)); + KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id)); } } + // FIXME - print msg if msg_id = kmp_i18n_null ??? file_name = ""; depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); @@ -3508,7 +3600,6 @@ __kmp_aux_affinity_initialize(void) KMP_ASSERT(address2os == NULL); return; } - if (depth < 0) { KMP_ASSERT(msg_id != kmp_i18n_null); KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); @@ -3526,7 +3617,6 @@ __kmp_aux_affinity_initialize(void) KMP_ASSERT(address2os == NULL); return; } - if (depth < 0) { KMP_ASSERT(msg_id != kmp_i18n_null); KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); @@ -3597,23 +3687,9 @@ __kmp_aux_affinity_initialize(void) depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); KMP_ASSERT(depth != 0); - if (depth < 0) { - if ((msg_id != kmp_i18n_null) - && (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none)))) { - KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id)); - } - - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); - if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; - } - // should not fail - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); } } @@ -3658,7 +3734,7 @@ __kmp_aux_affinity_initialize(void) kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc); if (__kmp_affinity_gran_levels == 0) { - KMP_DEBUG_ASSERT(numUnique == __kmp_avail_proc); + KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); } // @@ -3852,6 +3928,7 @@ __kmp_aux_affinity_initialize(void) } __kmp_free(osId2Mask); + machine_hierarchy.init(address2os, __kmp_avail_proc); } @@ -3953,7 +4030,7 @@ __kmp_affinity_set_init_mask(int gtid, int isa_root) } # endif KMP_ASSERT(fullMask != NULL); - i = -1; + i = KMP_PLACE_ALL; mask = fullMask; } else { @@ -4020,7 +4097,8 @@ __kmp_affinity_set_init_mask(int gtid, int isa_root) char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", gtid, buf); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid, + buf); } # if KMP_OS_WINDOWS @@ -4058,14 +4136,14 @@ __kmp_affinity_set_place(int gtid) // Check that the new place is within this thread's partition. // KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - KMP_DEBUG_ASSERT(th->th.th_new_place >= 0); - KMP_DEBUG_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); + KMP_ASSERT(th->th.th_new_place >= 0); + KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); if (th->th.th_first_place <= th->th.th_last_place) { - KMP_DEBUG_ASSERT((th->th.th_new_place >= th->th.th_first_place) + KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)); } else { - KMP_DEBUG_ASSERT((th->th.th_new_place <= th->th.th_first_place) + KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place)); } @@ -4082,7 +4160,8 @@ __kmp_affinity_set_place(int gtid) char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", gtid, buf); + KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), + gtid, buf); } __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); } @@ -4153,6 +4232,11 @@ __kmp_aux_set_affinity(void **mask) th->th.th_new_place = KMP_PLACE_UNDEFINED; th->th.th_first_place = 0; th->th.th_last_place = __kmp_affinity_num_masks - 1; + + // + // Turn off 4.0 affinity for the current tread at this parallel level. + // + th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; # endif return retval; @@ -4207,7 +4291,6 @@ __kmp_aux_get_affinity(void **mask) } - int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) { @@ -4360,7 +4443,8 @@ void __kmp_balanced_affinity( int tid, int nthreads ) if (__kmp_affinity_verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + tid, buf); } __kmp_set_system_affinity( mask, TRUE ); } else { // Non-uniform topology @@ -4535,7 +4619,8 @@ void __kmp_balanced_affinity( int tid, int nthreads ) if (__kmp_affinity_verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + tid, buf); } __kmp_set_system_affinity( mask, TRUE ); } @@ -4543,4 +4628,50 @@ void __kmp_balanced_affinity( int tid, int nthreads ) # endif /* KMP_MIC */ +#else + // affinity not supported + +kmp_uint32 mac_skipPerLevel[7]; +kmp_uint32 mac_depth; +kmp_uint8 mac_leaf_kids; +void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { + static int first = 1; + if (first) { + const kmp_uint32 maxLevels = 7; + kmp_uint32 numPerLevel[maxLevels]; + + for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = 1; + mac_skipPerLevel[i] = 1; + } + + mac_depth = 2; + numPerLevel[0] = nproc; + + kmp_uint32 branch = 4; + if (numPerLevel[0] == 1) branch = nproc/4; + if (branch<4) branch=4; + for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width + while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0! + if (numPerLevel[d] & 1) numPerLevel[d]++; + numPerLevel[d] = numPerLevel[d] >> 1; + if (numPerLevel[d+1] == 1) mac_depth++; + numPerLevel[d+1] = numPerLevel[d+1] << 1; + } + if(numPerLevel[0] == 1) { + branch = branch >> 1; + if (branch<4) branch = 4; + } + } + + for (kmp_uint32 i=1; i<mac_depth; ++i) + mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1]; + mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1; + first=0; + } + thr_bar->depth = mac_depth; + thr_bar->base_leaf_kids = mac_leaf_kids; + thr_bar->skip_per_level = mac_skipPerLevel; +} + #endif // KMP_AFFINITY_SUPPORTED |