diff options
Diffstat (limited to 'openmp/runtime/src/kmp_affinity.cpp')
-rw-r--r-- | openmp/runtime/src/kmp_affinity.cpp | 484 |
1 files changed, 28 insertions, 456 deletions
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 2e91c14e8d8..3664751ec73 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -18,6 +18,34 @@ #include "kmp_io.h" #include "kmp_str.h" #include "kmp_wrapper_getpid.h" +#include "kmp_affinity.h" + +// Store the real or imagined machine hierarchy here +static hierarchy_info machine_hierarchy; + +void __kmp_cleanup_hierarchy() { + machine_hierarchy.fini(); +} + +void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { + kmp_uint32 depth; + // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. + if (TCR_1(machine_hierarchy.uninitialized)) + machine_hierarchy.init(NULL, nproc); + // Adjust the hierarchy in case num threads exceeds original + if (nproc > machine_hierarchy.base_num_threads) + machine_hierarchy.resize(nproc); + + depth = machine_hierarchy.depth; + KMP_DEBUG_ASSERT(depth > 0); + // The loop below adjusts the depth in the case of a resize + while (nproc > machine_hierarchy.skipPerLevel[depth-1]) + depth++; + + thr_bar->depth = depth; + thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; + thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; +} #if KMP_AFFINITY_SUPPORTED @@ -108,393 +136,6 @@ __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) } } - -// -// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member -// functions. -// -// The icc codegen emits sections with extremely long names, of the form -// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug -// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving -// some sort of memory corruption or table overflow that is triggered by -// these long strings. I checked the latest version of the linker - -// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not -// fixed. -// -// Unfortunately, my attempts to reproduce it in a smaller example have -// failed - I'm not sure what the prospects are of getting it fixed -// properly - but we need a reproducer smaller than all of libomp. -// -// Work around the problem by avoiding inline constructors in such builds. -// We do this for all platforms, not just Linux* OS - non-inline functions are -// more debuggable and provide better coverage into than inline functions. -// Use inline functions in shipping libs, for performance. -// - -# if !defined(KMP_DEBUG) && !defined(COVER) - -class Address { -public: - static const unsigned maxDepth = 32; - unsigned labels[maxDepth]; - unsigned childNums[maxDepth]; - unsigned depth; - unsigned leader; - Address(unsigned _depth) - : depth(_depth), leader(FALSE) { - } - Address &operator=(const Address &b) { - depth = b.depth; - for (unsigned i = 0; i < depth; i++) { - labels[i] = b.labels[i]; - childNums[i] = b.childNums[i]; - } - leader = FALSE; - return *this; - } - bool operator==(const Address &b) const { - if (depth != b.depth) - return false; - for (unsigned i = 0; i < depth; i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool isClose(const Address &b, int level) const { - if (depth != b.depth) - return false; - if ((unsigned)level >= depth) - return true; - for (unsigned i = 0; i < (depth - level); i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool operator!=(const Address &b) const { - return !operator==(b); - } -}; - -class AddrUnsPair { -public: - Address first; - unsigned second; - AddrUnsPair(Address _first, unsigned _second) - : first(_first), second(_second) { - } - AddrUnsPair &operator=(const AddrUnsPair &b) - { - first = b.first; - second = b.second; - return *this; - } -}; - -# else - -class Address { -public: - static const unsigned maxDepth = 32; - unsigned labels[maxDepth]; - unsigned childNums[maxDepth]; - unsigned depth; - unsigned leader; - Address(unsigned _depth); - Address &operator=(const Address &b); - bool operator==(const Address &b) const; - bool isClose(const Address &b, int level) const; - bool operator!=(const Address &b) const; -}; - -Address::Address(unsigned _depth) -{ - depth = _depth; - leader = FALSE; -} - -Address &Address::operator=(const Address &b) { - depth = b.depth; - for (unsigned i = 0; i < depth; i++) { - labels[i] = b.labels[i]; - childNums[i] = b.childNums[i]; - } - leader = FALSE; - return *this; -} - -bool Address::operator==(const Address &b) const { - if (depth != b.depth) - return false; - for (unsigned i = 0; i < depth; i++) - if(labels[i] != b.labels[i]) - return false; - return true; -} - -bool Address::isClose(const Address &b, int level) const { - if (depth != b.depth) - return false; - if ((unsigned)level >= depth) - return true; - for (unsigned i = 0; i < (depth - level); i++) - if(labels[i] != b.labels[i]) - return false; - return true; -} - -bool Address::operator!=(const Address &b) const { - return !operator==(b); -} - -class AddrUnsPair { -public: - Address first; - unsigned second; - AddrUnsPair(Address _first, unsigned _second); - AddrUnsPair &operator=(const AddrUnsPair &b); -}; - -AddrUnsPair::AddrUnsPair(Address _first, unsigned _second) - : first(_first), second(_second) -{ -} - -AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b) -{ - first = b.first; - second = b.second; - return *this; -} - -# endif /* !defined(KMP_DEBUG) && !defined(COVER) */ - - -static int -__kmp_affinity_cmp_Address_labels(const void *a, const void *b) -{ - const Address *aa = (const Address *)&(((AddrUnsPair *)a) - ->first); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - for (i = 0; i < depth; i++) { - if (aa->labels[i] < bb->labels[i]) return -1; - if (aa->labels[i] > bb->labels[i]) return 1; - } - return 0; -} - - -static int -__kmp_affinity_cmp_Address_child_num(const void *a, const void *b) -{ - const Address *aa = (const Address *)&(((AddrUnsPair *)a) - ->first); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); - KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); - for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { - int j = depth - i - 1; - if (aa->childNums[j] < bb->childNums[j]) return -1; - if (aa->childNums[j] > bb->childNums[j]) return 1; - } - for (; i < depth; i++) { - int j = i - __kmp_affinity_compact; - if (aa->childNums[j] < bb->childNums[j]) return -1; - if (aa->childNums[j] > bb->childNums[j]) return 1; - } - return 0; -} - -/** A structure for holding machine-specific hierarchy info to be computed once at init. - This structure represents a mapping of threads to the actual machine hierarchy, or to - our best guess at what the hierarchy might be, for the purpose of performing an - efficient barrier. In the worst case, when there is no machine hierarchy information, - it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ -class hierarchy_info { -public: - /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package - or socket, packages/node, nodes/machine, etc. We don't want to get specific with - nomenclature. When the machine is oversubscribed we add levels to duplicate the - hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ - kmp_uint32 maxLevels; - - /** This is specifically the depth of the machine configuration hierarchy, in terms of the - number of levels along the longest path from root to any leaf. It corresponds to the - number of entries in numPerLevel if we exclude all but one trailing 1. */ - kmp_uint32 depth; - kmp_uint32 base_num_threads; - volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress - volatile kmp_int8 resizing; // 0=not resizing, 1=resizing - - /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a - node at level i has. For example, if we have a machine with 4 packages, 4 cores/package - and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ - kmp_uint32 *numPerLevel; - kmp_uint32 *skipPerLevel; - - void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { - int hier_depth = adr2os[0].first.depth; - int level = 0; - for (int i=hier_depth-1; i>=0; --i) { - int max = -1; - for (int j=0; j<num_addrs; ++j) { - int next = adr2os[j].first.childNums[i]; - if (next > max) max = next; - } - numPerLevel[level] = max+1; - ++level; - } - } - - hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {} - - // TO FIX: This destructor causes a segfault in the library at shutdown. - //~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } - - void init(AddrUnsPair *adr2os, int num_addrs) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2); - if (bool_result == 0) { // Wait for initialization - while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(bool_result==1); - - /* Added explicit initialization of the data fields here to prevent usage of dirty value - observed when static library is re-initialized multiple times (e.g. when - non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ - depth = 1; - resizing = 0; - maxLevels = 7; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - skipPerLevel[i] = 1; - } - - // Sort table by physical ID - if (adr2os) { - qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels); - deriveLevels(adr2os, num_addrs); - } - else { - numPerLevel[0] = 4; - numPerLevel[1] = num_addrs/4; - if (num_addrs%4) numPerLevel[1]++; - } - - base_num_threads = num_addrs; - for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth - if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' - depth++; - - kmp_uint32 branch = 4; - if (numPerLevel[0] == 1) branch = num_addrs/4; - if (branch<4) branch=4; - for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width - while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0! - if (numPerLevel[d] & 1) numPerLevel[d]++; - numPerLevel[d] = numPerLevel[d] >> 1; - if (numPerLevel[d+1] == 1) depth++; - numPerLevel[d+1] = numPerLevel[d+1] << 1; - } - if(numPerLevel[0] == 1) { - branch = branch >> 1; - if (branch<4) branch = 4; - } - } - - for (kmp_uint32 i=1; i<depth; ++i) - skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1]; - // Fill in hierarchy in the case of oversubscription - for (kmp_uint32 i=depth; i<maxLevels; ++i) - skipPerLevel[i] = 2*skipPerLevel[i-1]; - - uninitialized = 0; // One writer - - } - - void resize(kmp_uint32 nproc) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - if (bool_result == 0) { // Someone else is resizing - while (TCR_1(resizing) != 0) KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(bool_result!=0); - KMP_DEBUG_ASSERT(nproc > base_num_threads); - - // Calculate new max_levels - kmp_uint32 old_sz = skipPerLevel[depth-1]; - kmp_uint32 incs = 0, old_maxLevels= maxLevels; - while (nproc > old_sz) { - old_sz *=2; - incs++; - } - maxLevels += incs; - - // Resize arrays - kmp_uint32 *old_numPerLevel = numPerLevel; - kmp_uint32 *old_skipPerLevel = skipPerLevel; - numPerLevel = skipPerLevel = NULL; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - - // Copy old elements from old arrays - for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = old_numPerLevel[i]; - skipPerLevel[i] = old_skipPerLevel[i]; - } - - // Init new elements in arrays to 1 - for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - skipPerLevel[i] = 1; - } - - // Free old arrays - __kmp_free(old_numPerLevel); - - // Fill in oversubscription levels of hierarchy - for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) - skipPerLevel[i] = 2*skipPerLevel[i-1]; - - base_num_threads = nproc; - resizing = 0; // One writer - - } -}; - -static hierarchy_info machine_hierarchy; - -void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { - kmp_uint32 depth; - // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. - if (TCR_1(machine_hierarchy.uninitialized)) - machine_hierarchy.init(NULL, nproc); - // Adjust the hierarchy in case num threads exceeds original - if (nproc > machine_hierarchy.base_num_threads) - machine_hierarchy.resize(nproc); - - depth = machine_hierarchy.depth; - KMP_DEBUG_ASSERT(depth > 0); - // The loop below adjusts the depth in the case of a resize - while (nproc > machine_hierarchy.skipPerLevel[depth-1]) - depth++; - - thr_bar->depth = depth; - thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; - thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; -} - // // When sorting by labels, __kmp_affinity_assign_child_nums() must first be // called to renumber the labels from [0..n] and place them into the child_num @@ -4683,73 +4324,4 @@ void __kmp_balanced_affinity( int tid, int nthreads ) } } -#else - // affinity not supported - -static const kmp_uint32 noaff_maxLevels=7; -kmp_uint32 noaff_skipPerLevel[noaff_maxLevels]; -kmp_uint32 noaff_depth; -kmp_uint8 noaff_leaf_kids; -kmp_int8 noaff_uninitialized=1; - -void noaff_init(int nprocs) -{ - kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2); - if (result == 0) return; // Already initialized - else if (result == 2) { // Someone else is initializing - while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(result==1); - - kmp_uint32 numPerLevel[noaff_maxLevels]; - noaff_depth = 1; - for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level - numPerLevel[i] = 1; - noaff_skipPerLevel[i] = 1; - } - - numPerLevel[0] = 4; - numPerLevel[1] = nprocs/4; - if (nprocs%4) numPerLevel[1]++; - - for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth - if (numPerLevel[i] != 1 || noaff_depth > 1) // only count one top-level '1' - noaff_depth++; - - kmp_uint32 branch = 4; - if (numPerLevel[0] == 1) branch = nprocs/4; - if (branch<4) branch=4; - for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width - while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0! - if (numPerLevel[d] & 1) numPerLevel[d]++; - numPerLevel[d] = numPerLevel[d] >> 1; - if (numPerLevel[d+1] == 1) noaff_depth++; - numPerLevel[d+1] = numPerLevel[d+1] << 1; - } - if(numPerLevel[0] == 1) { - branch = branch >> 1; - if (branch<4) branch = 4; - } - } - - for (kmp_uint32 i=1; i<noaff_depth; ++i) - noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1]; - // Fill in hierarchy in the case of oversubscription - for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i) - noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1]; - noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1; - noaff_uninitialized = 0; // One writer - -} - -void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { - if (noaff_uninitialized) - noaff_init(nproc); - - thr_bar->depth = noaff_depth; - thr_bar->base_leaf_kids = noaff_leaf_kids; - thr_bar->skip_per_level = noaff_skipPerLevel; -} - #endif // KMP_AFFINITY_SUPPORTED |