summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_affinity.cpp
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2015-06-22 15:59:18 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2015-06-22 15:59:18 +0000
commit7f09a98ab138886454dfdf123a9fe9811ac20fb3 (patch)
treeff9b37080a8e636d4b33daa55044356b28a65628 /openmp/runtime/src/kmp_affinity.cpp
parent06407c032038c6c3c3776fcb9ad8b06729e0c1d8 (diff)
downloadbcm5719-llvm-7f09a98ab138886454dfdf123a9fe9811ac20fb3.tar.gz
bcm5719-llvm-7f09a98ab138886454dfdf123a9fe9811ac20fb3.zip
Allow machine hierarchy expansion
This fix allows the machine hierarchy to be expanded in case it needs to handle more threads. It adds a resize function to accomplish this. Differential Revision: http://reviews.llvm.org/D9900 llvm-svn: 240292
Diffstat (limited to 'openmp/runtime/src/kmp_affinity.cpp')
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp88
1 files changed, 78 insertions, 10 deletions
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 32a04465fe8..5fcee142c04 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -312,12 +312,18 @@ __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
return 0;
}
-/** A structure for holding machine-specific hierarchy info to be computed once at init. */
+/** A structure for holding machine-specific hierarchy info to be computed once at init.
+ This structure represents a mapping of threads to the actual machine hierarchy, or to
+ our best guess at what the hierarchy might be, for the purpose of performing an
+ efficient barrier. In the worst case, when there is no machine hierarchy information,
+ it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
class hierarchy_info {
public:
- /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
- etc. We don't want to get specific with nomenclature */
- static const kmp_uint32 maxLevels=7;
+ /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
+ or socket, packages/node, nodes/machine, etc. We don't want to get specific with
+ nomenclature. When the machine is oversubscribed we add levels to duplicate the
+ hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
+ kmp_uint32 maxLevels;
/** This is specifically the depth of the machine configuration hierarchy, in terms of the
number of levels along the longest path from root to any leaf. It corresponds to the
@@ -325,12 +331,13 @@ public:
kmp_uint32 depth;
kmp_uint32 base_num_threads;
volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
+ volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
- kmp_uint32 numPerLevel[maxLevels];
- kmp_uint32 skipPerLevel[maxLevels];
+ kmp_uint32 *numPerLevel;
+ kmp_uint32 *skipPerLevel;
void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
int hier_depth = adr2os[0].first.depth;
@@ -346,7 +353,11 @@ public:
}
}
- hierarchy_info() : depth(1), uninitialized(1) {}
+ hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
+
+ // TO FIX: This destructor causes a segfault in the library at shutdown.
+ //~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
+
void init(AddrUnsPair *adr2os, int num_addrs)
{
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
@@ -356,10 +367,14 @@ public:
}
KMP_DEBUG_ASSERT(bool_result==1);
- /* Added explicit initialization of the depth here to prevent usage of dirty value
+ /* Added explicit initialization of the data fields here to prevent usage of dirty value
observed when static library is re-initialized multiple times (e.g. when
non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
depth = 1;
+ resizing = 0;
+ maxLevels = 7;
+ numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+ skipPerLevel = &(numPerLevel[maxLevels]);
for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
numPerLevel[i] = 1;
skipPerLevel[i] = 1;
@@ -406,6 +421,56 @@ public:
uninitialized = 0; // One writer
}
+
+ void resize(kmp_uint32 nproc)
+ {
+ kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+ if (bool_result == 0) { // Someone else is resizing
+ while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
+ return;
+ }
+ KMP_DEBUG_ASSERT(bool_result!=0);
+ KMP_DEBUG_ASSERT(nproc > base_num_threads);
+
+ // Calculate new max_levels
+ kmp_uint32 old_sz = skipPerLevel[depth-1];
+ kmp_uint32 incs = 0, old_maxLevels= maxLevels;
+ while (nproc > old_sz) {
+ old_sz *=2;
+ incs++;
+ }
+ maxLevels += incs;
+
+ // Resize arrays
+ kmp_uint32 *old_numPerLevel = numPerLevel;
+ kmp_uint32 *old_skipPerLevel = skipPerLevel;
+ numPerLevel = skipPerLevel = NULL;
+ numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+ skipPerLevel = &(numPerLevel[maxLevels]);
+
+ // Copy old elements from old arrays
+ for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = old_numPerLevel[i];
+ skipPerLevel[i] = old_skipPerLevel[i];
+ }
+
+ // Init new elements in arrays to 1
+ for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ skipPerLevel[i] = 1;
+ }
+
+ // Free old arrays
+ __kmp_free(old_numPerLevel);
+
+ // Fill in oversubscription levels of hierarchy
+ for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
+ skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+ base_num_threads = nproc;
+ resizing = 0; // One writer
+
+ }
};
static hierarchy_info machine_hierarchy;
@@ -415,11 +480,14 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
if (TCR_1(machine_hierarchy.uninitialized))
machine_hierarchy.init(NULL, nproc);
+ // Adjust the hierarchy in case num threads exceeds original
+ if (nproc > machine_hierarchy.base_num_threads)
+ machine_hierarchy.resize(nproc);
depth = machine_hierarchy.depth;
KMP_DEBUG_ASSERT(depth > 0);
- // The loop below adjusts the depth in the case of oversubscription
- while (nproc > machine_hierarchy.skipPerLevel[depth-1] && depth<machine_hierarchy.maxLevels-1)
+ // The loop below adjusts the depth in the case of a resize
+ while (nproc > machine_hierarchy.skipPerLevel[depth-1])
depth++;
thr_bar->depth = depth;
OpenPOWER on IntegriCloud