summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_affinity.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_affinity.cpp')
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp46
1 files changed, 29 insertions, 17 deletions
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 6f4c7c1910b..00ba5c14a76 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -389,9 +389,6 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
int pkgLevel = 0;
int coreLevel = 1;
int threadLevel = 2;
- nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
- nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
- __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
if (! KMP_AFFINITY_CAPABLE())
{
@@ -401,6 +398,8 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
+ __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
if (__kmp_affinity_verbose) {
@@ -423,23 +422,34 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+ //
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+ // nCoresPerPkg, & nPackages. Make sure all these vars are set
+ // correctly, and return if affinity is not enabled.
+ //
+
hwloc_obj_t pu;
hwloc_obj_t core;
hwloc_obj_t socket;
int nActiveThreads = 0;
int socket_identifier = 0;
+ // re-calculate globals to count only accessible resources
+ __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
socket != NULL;
socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
socket_identifier++)
{
int core_identifier = 0;
+ int num_active_cores = 0;
for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
core_identifier++)
{
int pu_identifier = 0;
+ int num_active_threads = 0;
for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
@@ -447,7 +457,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
{
Address addr(3);
if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
- continue;
+ continue; // skip inactive (inaccessible) unit
KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
addr.labels[0] = socket_identifier; // package
@@ -455,13 +465,26 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
addr.labels[2] = pu_identifier; // pu
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
nActiveThreads++;
+ ++num_active_threads; // count active threads per core
+ }
+ if (num_active_threads) { // were there any active threads on the core?
+ ++__kmp_ncores; // count total active cores
+ ++num_active_cores; // count active cores per socket
+ if (num_active_threads > __kmp_nThreadsPerCore)
+ __kmp_nThreadsPerCore = num_active_threads; // calc maximum
}
}
+ if (num_active_cores) { // were there any active cores on the socket?
+ ++nPackages; // count total active packages
+ if (num_active_cores > nCoresPerPkg)
+ nCoresPerPkg = num_active_cores; // calc maximum
+ }
}
//
// If there's only one thread context to bind to, return now.
//
+ KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
KMP_ASSERT(nActiveThreads > 0);
if (nActiveThreads == 1) {
__kmp_ncores = nPackages = 1;
@@ -514,20 +537,9 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
//
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return if affinity is not enabled.
- //
- __kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
-
- //
// Check to see if the machine topology is uniform
//
- unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
- unsigned ncores = __kmp_ncores;
- unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU);
- unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
+ unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
//
// Print the machine topology summary.
@@ -552,7 +564,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
kmp_str_buf_t buf;
__kmp_str_buf_init(&buf);
- __kmp_str_buf_print(&buf, "%d", npackages);
+ __kmp_str_buf_print(&buf, "%d", nPackages);
//for (level = 1; level <= pkgLevel; level++) {
// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
// }
OpenPOWER on IntegriCloud