/* * kmp_affinity.h -- header for affinity management */ //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.txt for details. // //===----------------------------------------------------------------------===// #ifndef KMP_AFFINITY_H #define KMP_AFFINITY_H extern int __kmp_affinity_compact; /* Affinity 'compact' value */ class Address { public: static const unsigned maxDepth = 32; unsigned labels[maxDepth]; unsigned childNums[maxDepth]; unsigned depth; unsigned leader; Address(unsigned _depth) : depth(_depth), leader(FALSE) { } Address &operator=(const Address &b) { depth = b.depth; for (unsigned i = 0; i < depth; i++) { labels[i] = b.labels[i]; childNums[i] = b.childNums[i]; } leader = FALSE; return *this; } bool operator==(const Address &b) const { if (depth != b.depth) return false; for (unsigned i = 0; i < depth; i++) if(labels[i] != b.labels[i]) return false; return true; } bool isClose(const Address &b, int level) const { if (depth != b.depth) return false; if ((unsigned)level >= depth) return true; for (unsigned i = 0; i < (depth - level); i++) if(labels[i] != b.labels[i]) return false; return true; } bool operator!=(const Address &b) const { return !operator==(b); } void print() const { unsigned i; printf("Depth: %u --- ", depth); for(i=0;ifirst); const Address *bb = (const Address *)&(((AddrUnsPair *)b) ->first); unsigned depth = aa->depth; unsigned i; KMP_DEBUG_ASSERT(depth == bb->depth); for (i = 0; i < depth; i++) { if (aa->labels[i] < bb->labels[i]) return -1; if (aa->labels[i] > bb->labels[i]) return 1; } return 0; } static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) { const Address *aa = (const Address *)&(((AddrUnsPair *)a) ->first); const Address *bb = (const Address *)&(((AddrUnsPair *)b) ->first); unsigned depth = aa->depth; unsigned i; KMP_DEBUG_ASSERT(depth == bb->depth); KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { int j = depth - i - 1; if (aa->childNums[j] < bb->childNums[j]) return -1; if (aa->childNums[j] > bb->childNums[j]) return 1; } for (; i < depth; i++) { int j = i - __kmp_affinity_compact; if (aa->childNums[j] < bb->childNums[j]) return -1; if (aa->childNums[j] > bb->childNums[j]) return 1; } return 0; } /** A structure for holding machine-specific hierarchy info to be computed once at init. This structure represents a mapping of threads to the actual machine hierarchy, or to our best guess at what the hierarchy might be, for the purpose of performing an efficient barrier. In the worst case, when there is no machine hierarchy information, it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ class hierarchy_info { public: /** Good default values for number of leaves and branching factor, given no affinity information. Behaves a bit like hyper barrier. */ static const kmp_uint32 maxLeaves=4; static const kmp_uint32 minBranch=4; /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine, etc. We don't want to get specific with nomenclature. When the machine is oversubscribed we add levels to duplicate the hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ kmp_uint32 maxLevels; /** This is specifically the depth of the machine configuration hierarchy, in terms of the number of levels along the longest path from root to any leaf. It corresponds to the number of entries in numPerLevel if we exclude all but one trailing 1. */ kmp_uint32 depth; kmp_uint32 base_num_threads; enum init_status { initialized=0, not_initialized=1, initializing=2 }; volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress volatile kmp_int8 resizing; // 0=not resizing, 1=resizing /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a node at level i has. For example, if we have a machine with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ kmp_uint32 *numPerLevel; kmp_uint32 *skipPerLevel; void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { int hier_depth = adr2os[0].first.depth; int level = 0; for (int i=hier_depth-1; i>=0; --i) { int max = -1; for (int j=0; j max) max = next; } numPerLevel[level] = max+1; ++level; } } hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } void init(AddrUnsPair *adr2os, int num_addrs) { kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); if (bool_result == 0) { // Wait for initialization while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); return; } KMP_DEBUG_ASSERT(bool_result==1); /* Added explicit initialization of the data fields here to prevent usage of dirty value observed when static library is re-initialized multiple times (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ depth = 1; resizing = 0; maxLevels = 7; numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); skipPerLevel = &(numPerLevel[maxLevels]); for (kmp_uint32 i=0; i=0; --i) // count non-empty levels to get depth if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' depth++; kmp_uint32 branch = minBranch; if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; if (branch branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! if (numPerLevel[d] & 1) numPerLevel[d]++; numPerLevel[d] = numPerLevel[d] >> 1; if (numPerLevel[d+1] == 1) depth++; numPerLevel[d+1] = numPerLevel[d+1] << 1; } if(numPerLevel[0] == 1) { branch = branch >> 1; if (branch<4) branch = minBranch; } } for (kmp_uint32 i=1; iold_sz; ++i) { skipPerLevel[i] = 2*skipPerLevel[i-1]; numPerLevel[i-1] *= 2; old_sz *= 2; depth++; } if (nproc > old_sz) { // Not enough space, need to expand hierarchy while (nproc > old_sz) { old_sz *=2; incs++; depth++; } maxLevels += incs; // Resize arrays kmp_uint32 *old_numPerLevel = numPerLevel; kmp_uint32 *old_skipPerLevel = skipPerLevel; numPerLevel = skipPerLevel = NULL; numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); skipPerLevel = &(numPerLevel[maxLevels]); // Copy old elements from old arrays for (kmp_uint32 i=0; i