diff options
author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2015-09-10 20:34:32 +0000 |
---|---|---|
committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2015-09-10 20:34:32 +0000 |
commit | df4d3dd65938ce75d1c8db880c8e362de50bfa9e (patch) | |
tree | 2349f68c7884c1584d238f35755347624ba7b1ce /openmp/runtime/src | |
parent | 4bed31b9bffedfdcbf7177ceb465da7f04cc08b5 (diff) | |
download | bcm5719-llvm-df4d3dd65938ce75d1c8db880c8e362de50bfa9e.tar.gz bcm5719-llvm-df4d3dd65938ce75d1c8db880c8e362de50bfa9e.zip |
Fix depth field bug and resize() function in hierarchical barrier
This is a follow up to the hierarchy cleanup patch.
Added some clarifying comments to hierarchy_info.
Fixed a bug with the depth field not being updated cleanly during a resize.
Fixed resize to first check capacity as determined by maxLevels before actually doing the full resize.
Differential Revision: http://reviews.llvm.org/D12562
llvm-svn: 247333
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r-- | openmp/runtime/src/kmp_affinity.cpp | 9 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_affinity.h | 29 |
2 files changed, 26 insertions, 12 deletions
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 3664751ec73..a9d1f601fca 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -32,15 +32,12 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. if (TCR_1(machine_hierarchy.uninitialized)) machine_hierarchy.init(NULL, nproc); - // Adjust the hierarchy in case num threads exceeds original - if (nproc > machine_hierarchy.base_num_threads) - machine_hierarchy.resize(nproc); depth = machine_hierarchy.depth; KMP_DEBUG_ASSERT(depth > 0); - // The loop below adjusts the depth in the case of a resize - while (nproc > machine_hierarchy.skipPerLevel[depth-1]) - depth++; + // Adjust the hierarchy in case num threads exceeds original + if (nproc > machine_hierarchy.skipPerLevel[depth-1]) + machine_hierarchy.resize(nproc); thr_bar->depth = depth; thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index 75f21ede76e..0385307bcca 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -119,15 +119,21 @@ __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) } -/** A structure for holding machine-specific hierarchy info to be computed once at init. */ +/** A structure for holding machine-specific hierarchy info to be computed once at init. + This structure represents a mapping of threads to the actual machine hierarchy, or to + our best guess at what the hierarchy might be, for the purpose of performing an + efficient barrier. In the worst case, when there is no machine hierarchy information, + it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ class hierarchy_info { public: /** Good default values for number of leaves and branching factor, given no affinity information. Behaves a bit like hyper barrier. */ static const kmp_uint32 maxLeaves=4; static const kmp_uint32 minBranch=4; - /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine, - etc. We don't want to get specific with nomenclature */ + /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package + or socket, packages/node, nodes/machine, etc. We don't want to get specific with + nomenclature. When the machine is oversubscribed we add levels to duplicate the + hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ kmp_uint32 maxLevels; /** This is specifically the depth of the machine configuration hierarchy, in terms of the @@ -227,6 +233,7 @@ public: } + // Resize the hierarchy if nproc changes to something larger than before void resize(kmp_uint32 nproc) { kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); @@ -237,13 +244,23 @@ public: KMP_DEBUG_ASSERT(bool_result!=0); KMP_DEBUG_ASSERT(nproc > base_num_threads); - // Calculate new max_levels + // Calculate new maxLevels kmp_uint32 old_sz = skipPerLevel[depth-1]; - kmp_uint32 incs = 0, old_maxLevels= maxLevels; + kmp_uint32 incs = 0, old_maxLevels = maxLevels; + // First see if old maxLevels is enough to contain new size + for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) { + skipPerLevel[i] = 2*skipPerLevel[i-1]; + old_sz *= 2; + depth++; + } + if (nproc <= old_sz) // enough space already + return; + // Not enough space, need to expand hierarchy while (nproc > old_sz) { old_sz *=2; incs++; - } + depth++; + } maxLevels += incs; // Resize arrays |