summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2016-11-14 21:08:35 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2016-11-14 21:08:35 +0000
commit1cdd87adfd379d178c2aeae59b5bb05079ef67f5 (patch)
tree3ea5a3378d309ca3307495e7ccab89d7aa337d66
parent22fc0078095c5806aba560a3ec9add1e9c11e647 (diff)
downloadbcm5719-llvm-1cdd87adfd379d178c2aeae59b5bb05079ef67f5.tar.gz
bcm5719-llvm-1cdd87adfd379d178c2aeae59b5bb05079ef67f5.zip
Introduce dynamic affinity dispatch capabilities
This set of changes enables the affinity interface (Either the preexisting native operating system or HWLOC) to be dynamically set at runtime initialization. The point of this change is that we were seeing performance degradations when using HWLOC. This allows the user to use the old affinity mechanisms which on large machines (>64 cores) makes a large difference in initialization time. These changes mostly move affinity code under a small class hierarchy: KMPAffinity class Mask {} KMPNativeAffinity : public KMPAffinity class Mask : public KMPAffinity::Mask KMPHwlocAffinity class Mask : public KMPAffinity::Mask Since all interface functions (for both affinity and the mask implementation) are virtual, the implementation can be chosen at runtime initialization. Differential Revision: https://reviews.llvm.org/D26356 llvm-svn: 286890
-rw-r--r--openmp/runtime/cmake/LibompHandleFlags.cmake1
-rw-r--r--openmp/runtime/cmake/config-ix.cmake1
-rw-r--r--openmp/runtime/src/kmp.h368
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp98
-rw-r--r--openmp/runtime/src/kmp_affinity.h464
-rw-r--r--openmp/runtime/src/kmp_ftn_cdecl.c1
-rw-r--r--openmp/runtime/src/kmp_ftn_entry.h18
-rw-r--r--openmp/runtime/src/kmp_ftn_extra.c1
-rw-r--r--openmp/runtime/src/kmp_global.c12
-rw-r--r--openmp/runtime/src/kmp_runtime.c1
-rw-r--r--openmp/runtime/src/kmp_settings.c37
-rw-r--r--openmp/runtime/src/z_Linux_util.c113
-rw-r--r--openmp/runtime/src/z_Windows_NT_util.c239
13 files changed, 633 insertions, 721 deletions
diff --git a/openmp/runtime/cmake/LibompHandleFlags.cmake b/openmp/runtime/cmake/LibompHandleFlags.cmake
index 5a7386074bc..5cc8d887fcc 100644
--- a/openmp/runtime/cmake/LibompHandleFlags.cmake
+++ b/openmp/runtime/cmake/LibompHandleFlags.cmake
@@ -28,6 +28,7 @@ function(libomp_get_c_and_cxxflags_common flags)
set(flags_local)
libomp_append(flags_local -std=c++11 LIBOMP_HAVE_STD_CPP11_FLAG)
libomp_append(flags_local -fno-exceptions LIBOMP_HAVE_FNO_EXCEPTIONS_FLAG)
+ libomp_append(flags_local -fno-rtti LIBOMP_HAVE_FNO_RTTI_FLAG)
if(${LIBOMP_ENABLE_WERROR})
libomp_append(flags_local -Werror LIBOMP_HAVE_WERROR_FLAG)
endif()
diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake
index c17c9f8a324..0375d211b75 100644
--- a/openmp/runtime/cmake/config-ix.cmake
+++ b/openmp/runtime/cmake/config-ix.cmake
@@ -49,6 +49,7 @@ endfunction()
# Checking C, CXX, Linker Flags
check_cxx_compiler_flag(-std=c++11 LIBOMP_HAVE_STD_CPP11_FLAG)
check_cxx_compiler_flag(-fno-exceptions LIBOMP_HAVE_FNO_EXCEPTIONS_FLAG)
+check_cxx_compiler_flag(-fno-rtti LIBOMP_HAVE_FNO_RTTI_FLAG)
check_c_compiler_flag("-x c++" LIBOMP_HAVE_X_CPP_FLAG)
check_c_compiler_flag(-Werror LIBOMP_HAVE_WERROR_FLAG)
check_c_compiler_flag(-Wunused-function LIBOMP_HAVE_WNO_UNUSED_FUNCTION_FLAG)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index f4b39653949..fc2facc38f3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -528,8 +528,8 @@ typedef int PACKED_REDUCTION_METHOD_T;
*/
#if KMP_AFFINITY_SUPPORTED
-# if KMP_GROUP_AFFINITY
// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
+# if KMP_OS_WINDOWS
# if _MSC_VER < 1600
typedef struct GROUP_AFFINITY {
KAFFINITY Mask;
@@ -537,7 +537,11 @@ typedef struct GROUP_AFFINITY {
WORD Reserved[3];
} GROUP_AFFINITY;
# endif /* _MSC_VER < 1600 */
+# if KMP_GROUP_AFFINITY
extern int __kmp_num_proc_groups;
+# else
+static const int __kmp_num_proc_groups = 1;
+# endif /* KMP_GROUP_AFFINITY */
typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
@@ -549,285 +553,107 @@ extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
-# endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_OS_WINDOWS */
+
+# if KMP_USE_HWLOC
+extern hwloc_topology_t __kmp_hwloc_topology;
+extern int __kmp_hwloc_error;
+# endif
extern size_t __kmp_affin_mask_size;
# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
-# if !KMP_USE_HWLOC
-# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
-# define KMP_CPU_SET_ITERATE(i,mask) \
- for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
-# endif
-
-#if KMP_USE_HWLOC
-
-extern hwloc_topology_t __kmp_hwloc_topology;
-extern int __kmp_hwloc_error;
-typedef hwloc_cpuset_t kmp_affin_mask_t;
-# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
-# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
-# define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
-# define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask)
-# define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
-# define KMP_CPU_AND(dest, src) hwloc_bitmap_and((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
- { \
- unsigned i; \
- for(i=0;i<(unsigned)max_bit_number+1;i++) { \
- if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
- hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
- } else { \
- hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
- } \
- } \
- hwloc_bitmap_and((hwloc_cpuset_t)mask, (hwloc_cpuset_t)mask, \
- (hwloc_cpuset_t)__kmp_affin_fullMask); \
- } \
-
-# define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
# define KMP_CPU_SET_ITERATE(i,mask) \
- for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))
-
-# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
-# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
+ for (i = (mask)->begin(); i != (mask)->end() ; i = (mask)->next(i))
+# define KMP_CPU_SET(i,mask) (mask)->set(i)
+# define KMP_CPU_ISSET(i,mask) (mask)->is_set(i)
+# define KMP_CPU_CLR(i,mask) (mask)->clear(i)
+# define KMP_CPU_ZERO(mask) (mask)->zero()
+# define KMP_CPU_COPY(dest, src) (dest)->copy(src)
+# define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
+# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
+# define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
+# define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
+# define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
-
-//
-// The following macro should be used to index an array of masks.
-// The array should be declared as "kmp_affinity_t *" and allocated with
-// size "__kmp_affinity_mask_size * len". The macro takes care of the fact
-// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
-// on Linux* OS, sizeof(kmp_affin_t) is 1.
-//
-# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
-# define KMP_CPU_ALLOC_ARRAY(arr, n) { \
- arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
- unsigned i; \
- for(i=0;i<(unsigned)n;i++) { \
- arr[i] = hwloc_bitmap_alloc(); \
- } \
- }
-# define KMP_CPU_FREE_ARRAY(arr, n) { \
- unsigned i; \
- for(i=0;i<(unsigned)n;i++) { \
- hwloc_bitmap_free(arr[i]); \
- } \
- __kmp_free(arr); \
- }
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \
- arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
- unsigned i; \
- for(i=0;i<(unsigned)n;i++) { \
- arr[i] = hwloc_bitmap_alloc(); \
- } \
- }
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
- unsigned i; \
- for(i=0;i<(unsigned)n;i++) { \
- hwloc_bitmap_free(arr[i]); \
- } \
- KMP_INTERNAL_FREE(arr); \
- }
-
-#else /* KMP_USE_HWLOC */
-# if KMP_OS_LINUX
-//
-// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
-// (in bytes). It should be allocated on a word boundary.
-//
-// WARNING!!! We have made the base type of the affinity mask unsigned char,
-// in order to eliminate a lot of checks that the true system mask size is
-// really a multiple of 4 bytes (on Linux* OS).
-//
-// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!!
-//
-
-typedef unsigned char kmp_affin_mask_t;
-
-# define _KMP_CPU_SET(i,mask) (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
-# define KMP_CPU_SET(i,mask) _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
-# define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT))))
-# define KMP_CPU_ISSET(i,mask) _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
-# define _KMP_CPU_CLR(i,mask) (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
-# define KMP_CPU_CLR(i,mask) _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
-
-# define KMP_CPU_ZERO(mask) \
- { \
- size_t __i; \
- for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
- ((kmp_affin_mask_t *)(mask))[__i] = 0; \
- } \
- }
-
-# define KMP_CPU_COPY(dest, src) \
- { \
- size_t __i; \
- for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
- ((kmp_affin_mask_t *)(dest))[__i] \
- = ((kmp_affin_mask_t *)(src))[__i]; \
- } \
- }
-
-# define KMP_CPU_AND(dest, src) \
- { \
- size_t __i; \
- for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
- ((kmp_affin_mask_t *)(dest))[__i] \
- &= ((kmp_affin_mask_t *)(src))[__i]; \
- } \
- }
-
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
- { \
- size_t __i; \
- for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
- ((kmp_affin_mask_t *)(mask))[__i] \
- = ~((kmp_affin_mask_t *)(mask))[__i]; \
- } \
- KMP_CPU_AND(mask, __kmp_affin_fullMask); \
- }
-
-# define KMP_CPU_UNION(dest, src) \
- { \
- size_t __i; \
- for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
- ((kmp_affin_mask_t *)(dest))[__i] \
- |= ((kmp_affin_mask_t *)(src))[__i]; \
- } \
- }
-
-# endif /* KMP_OS_LINUX */
-
-# if KMP_OS_WINDOWS
-//
-// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on
-// Intel(R) 64 it is 8 bytes times the number of processor groups.
-//
-
-# if KMP_GROUP_AFFINITY
-typedef DWORD_PTR kmp_affin_mask_t;
-
-# define _KMP_CPU_SET(i,mask) \
- (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \
- (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
-
-# define KMP_CPU_SET(i,mask) \
- _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
-
-# define _KMP_CPU_ISSET(i,mask) \
- (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] & \
- (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))))
-
-# define KMP_CPU_ISSET(i,mask) \
- _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
-
-# define _KMP_CPU_CLR(i,mask) \
- (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &= \
- ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
-
-# define KMP_CPU_CLR(i,mask) \
- _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
-
-# define KMP_CPU_ZERO(mask) \
- { \
- int __i; \
- for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
- ((kmp_affin_mask_t *)(mask))[__i] = 0; \
- } \
- }
-
-# define KMP_CPU_COPY(dest, src) \
- { \
- int __i; \
- for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
- ((kmp_affin_mask_t *)(dest))[__i] \
- = ((kmp_affin_mask_t *)(src))[__i]; \
- } \
- }
-
-# define KMP_CPU_AND(dest, src) \
- { \
- int __i; \
- for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
- ((kmp_affin_mask_t *)(dest))[__i] \
- &= ((kmp_affin_mask_t *)(src))[__i]; \
- } \
- }
-
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
- { \
- int __i; \
- for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
- ((kmp_affin_mask_t *)(mask))[__i] \
- = ~((kmp_affin_mask_t *)(mask))[__i]; \
- } \
- KMP_CPU_AND(mask, __kmp_affin_fullMask); \
- }
-
-# define KMP_CPU_UNION(dest, src) \
- { \
- int __i; \
- for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
- ((kmp_affin_mask_t *)(dest))[__i] \
- |= ((kmp_affin_mask_t *)(src))[__i]; \
- } \
- }
-
-
-# else /* KMP_GROUP_AFFINITY */
-
-typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
-
-# define KMP_CPU_SET(i,mask) (*(mask) |= (((kmp_affin_mask_t)1) << (i)))
-# define KMP_CPU_ISSET(i,mask) (!!(*(mask) & (((kmp_affin_mask_t)1) << (i))))
-# define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
-# define KMP_CPU_ZERO(mask) (*(mask) = 0)
-# define KMP_CPU_COPY(dest, src) (*(dest) = *(src))
-# define KMP_CPU_AND(dest, src) (*(dest) &= *(src))
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)); KMP_CPU_AND(mask, __kmp_affin_fullMask)
-# define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
-
-# endif /* KMP_GROUP_AFFINITY */
-
-# endif /* KMP_OS_WINDOWS */
-
-//
-// __kmp_allocate() will return memory allocated on a 4-bytes boundary.
-// after zeroing it - it takes care of those assumptions stated above.
-//
-# define KMP_CPU_ALLOC(ptr) \
- (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
-# define KMP_CPU_FREE(ptr) __kmp_free(ptr)
-# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
-# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
-# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
-# define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr)
-
-//
-// The following macro should be used to index an array of masks.
-// The array should be declared as "kmp_affinity_t *" and allocated with
-// size "__kmp_affinity_mask_size * len". The macro takes care of the fact
-// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
-// on Linux* OS, sizeof(kmp_affin_t) is 1.
-//
-# define KMP_CPU_INDEX(array,i) \
- ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
-# define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
-# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);
-
-#endif /* KMP_USE_HWLOC */
-
-// prototype after typedef of kmp_affin_mask_t
-#if KMP_GROUP_AFFINITY
-extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
+# define KMP_CPU_INDEX(arr,i) __kmp_affinity_dispatch->index_mask_array(arr, i)
+# define KMP_CPU_ALLOC_ARRAY(arr, n) (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
+# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_affinity_dispatch->deallocate_mask_array(arr)
+# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
+# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
+# define __kmp_get_system_affinity(mask, abort_bool) (mask)->get_system_affinity(abort_bool)
+# define __kmp_set_system_affinity(mask, abort_bool) (mask)->set_system_affinity(abort_bool)
+# define __kmp_get_proc_group(mask) (mask)->get_proc_group()
+
+class KMPAffinity {
+public:
+ class Mask {
+ public:
+ void* operator new(size_t n);
+ void operator delete(void* p);
+ void* operator new[](size_t n);
+ void operator delete[](void* p);
+ virtual ~Mask() {}
+ // Set bit i to 1
+ virtual void set(int i) {}
+ // Return bit i
+ virtual bool is_set(int i) const { return false; }
+ // Set bit i to 0
+ virtual void clear(int i) {}
+ // Zero out entire mask
+ virtual void zero() {}
+ // Copy src into this mask
+ virtual void copy(const Mask* src) {}
+ // this &= rhs
+ virtual void bitwise_and(const Mask* rhs) {}
+ // this |= rhs
+ virtual void bitwise_or(const Mask* rhs) {}
+ // this = ~this
+ virtual void bitwise_not() {}
+ // API for iterating over an affinity mask
+ // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
+ virtual int begin() const { return 0; }
+ virtual int end() const { return 0; }
+ virtual int next(int previous) const { return 0; }
+ // Set the system's affinity to this affinity mask's value
+ virtual int set_system_affinity(bool abort_on_error) const { return -1; }
+ // Set this affinity mask to the current system affinity
+ virtual int get_system_affinity(bool abort_on_error) { return -1; }
+ // Only 1 DWORD in the mask should have any procs set.
+ // Return the appropriate index, or -1 for an invalid mask.
+ virtual int get_proc_group() const { return -1; }
+ };
+ void* operator new(size_t n);
+ void operator delete(void* p);
+ // Determine if affinity is capable
+ virtual void determine_capable(const char* env_var) {}
+ // Bind the current thread to os proc
+ virtual void bind_thread(int proc) {}
+ // Factory functions to allocate/deallocate a mask
+ virtual Mask* allocate_mask() { return nullptr; }
+ virtual void deallocate_mask(Mask* m) { }
+ virtual Mask* allocate_mask_array(int num) { return nullptr; }
+ virtual void deallocate_mask_array(Mask* m) { }
+ virtual Mask* index_mask_array(Mask* m, int index) { return nullptr; }
+ static void pick_api();
+ static void destroy_api();
+ enum api_type {
+ NATIVE_OS
+#if KMP_USE_HWLOC
+ , HWLOC
#endif
+ };
+ virtual api_type get_api_type() const { KMP_ASSERT(0); return NATIVE_OS; };
+private:
+ static bool picked_api;
+};
+
+typedef KMPAffinity::Mask kmp_affin_mask_t;
+extern KMPAffinity* __kmp_affinity_dispatch;
//
// Declare local char buffers with this size for printing debug and info
@@ -895,8 +721,6 @@ extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mas
extern char * __kmp_affinity_proclist; /* proc ID list */
extern kmp_affin_mask_t *__kmp_affinity_masks;
extern unsigned __kmp_affinity_num_masks;
-extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error);
-extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error);
extern void __kmp_affinity_bind_thread(int which);
extern kmp_affin_mask_t *__kmp_affin_fullMask;
@@ -2606,7 +2430,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
// Read/write by workers as well -----------------------------------------------------------------------
-#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_USE_HWLOC
+#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
// Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
// and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
// and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 93299b53e0a..f09c3cdac93 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -47,53 +47,42 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
#if KMP_AFFINITY_SUPPORTED
-//
-// Print the affinity mask to the character array in a pretty format.
-//
+bool KMPAffinity::picked_api = false;
+
+void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
+void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
+void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); }
+void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); }
+void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
+void KMPAffinity::operator delete(void* p) { __kmp_free(p); }
+
+void KMPAffinity::pick_api() {
+ KMPAffinity* affinity_dispatch;
+ if (picked_api)
+ return;
#if KMP_USE_HWLOC
-char *
-__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
-{
- int num_chars_to_write, num_chars_written;
- char* scan;
- KMP_ASSERT(buf_len >= 40);
+ if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+ affinity_dispatch = new KMPHwlocAffinity();
+ } else
+#endif
+ {
+ affinity_dispatch = new KMPNativeAffinity();
+ }
+ __kmp_affinity_dispatch = affinity_dispatch;
+ picked_api = true;
+}
- // bufsize of 0 just retrieves the needed buffer size.
- num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
-
- // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
- // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
- // take into account the '\0' character.
- if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
- KMP_SNPRINTF(buf, buf_len, "{<empty>}");
- } else if(num_chars_to_write < buf_len - 3) {
- // no problem fitting the mask into buf_len number of characters
- buf[0] = '{';
- // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
- num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
- buf[num_chars_written+1] = '}';
- buf[num_chars_written+2] = '\0';
- } else {
- // Need to truncate the affinity mask string and add ellipsis.
- // To do this, we first write out the '{' + str(mask)
- buf[0] = '{';
- hwloc_bitmap_list_snprintf(buf+1, buf_len-1, (hwloc_bitmap_t)mask);
- // then, what we do here is go to the 7th to last character, then go backwards until we are NOT
- // on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't
- // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
- // { 45, 67,...} instead.
- scan = buf + buf_len - 7;
- while(*scan >= '0' && *scan <= '9' && scan >= buf)
- scan--;
- *(scan+1) = '.';
- *(scan+2) = '.';
- *(scan+3) = '.';
- *(scan+4) = '}';
- *(scan+5) = '\0';
+void KMPAffinity::destroy_api() {
+ if (__kmp_affinity_dispatch != NULL) {
+ delete __kmp_affinity_dispatch;
+ __kmp_affinity_dispatch = NULL;
+ picked_api = false;
}
- return buf;
}
-#else
+
+//
+// Print the affinity mask to the character array in a pretty format.
+//
char *
__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
{
@@ -105,12 +94,8 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
// Find first element / check for empty set.
//
size_t i;
- for (i = 0; i < KMP_CPU_SETSIZE; i++) {
- if (KMP_CPU_ISSET(i, mask)) {
- break;
- }
- }
- if (i == KMP_CPU_SETSIZE) {
+ i = mask->begin();
+ if (i == mask->end()) {
KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
while (*scan != '\0') scan++;
KMP_ASSERT(scan <= end);
@@ -120,7 +105,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
while (*scan != '\0') scan++;
i++;
- for (; i < KMP_CPU_SETSIZE; i++) {
+ for (; i != mask->end(); i = mask->next(i)) {
if (! KMP_CPU_ISSET(i, mask)) {
continue;
}
@@ -137,7 +122,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
while (*scan != '\0') scan++;
}
- if (i < KMP_CPU_SETSIZE) {
+ if (i != mask->end()) {
KMP_SNPRINTF(scan, end-scan+1, ",...");
while (*scan != '\0') scan++;
}
@@ -146,7 +131,6 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
KMP_ASSERT(scan <= end);
return buf;
}
-#endif // KMP_USE_HWLOC
void
@@ -677,7 +661,7 @@ __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
if (__kmp_affinity_type == affinity_none) {
int avail_ct = 0;
- unsigned int i;
+ int i;
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
continue;
@@ -1031,7 +1015,7 @@ __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
}
KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
- __kmp_affinity_bind_thread(i);
+ __kmp_affinity_dispatch->bind_thread(i);
threadInfo[nApics].osId = i;
//
@@ -1547,7 +1531,7 @@ __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
}
KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
- __kmp_affinity_bind_thread(proc);
+ __kmp_affinity_dispatch->bind_thread(proc);
//
// Extrach the labels for each level in the machine topology map
@@ -3705,7 +3689,7 @@ __kmp_aux_affinity_initialize(void)
const char *file_name = NULL;
int line = 0;
# if KMP_USE_HWLOC
- if (depth < 0) {
+ if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
if (__kmp_affinity_verbose) {
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
}
@@ -3947,6 +3931,7 @@ __kmp_aux_affinity_initialize(void)
# if KMP_USE_HWLOC
else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+ KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
if (__kmp_affinity_verbose) {
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
}
@@ -4233,6 +4218,7 @@ __kmp_affinity_uninitialize(void)
__kmp_hwloc_topology = NULL;
}
# endif
+ KMPAffinity::destroy_api();
}
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index 0fa1f3d06c9..fc43e8a7462 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -15,6 +15,470 @@
#ifndef KMP_AFFINITY_H
#define KMP_AFFINITY_H
+#include "kmp_os.h"
+#include "kmp.h"
+
+#if KMP_AFFINITY_SUPPORTED
+#if KMP_USE_HWLOC
+class KMPHwlocAffinity: public KMPAffinity {
+public:
+ class Mask : public KMPAffinity::Mask {
+ hwloc_cpuset_t mask;
+ public:
+ Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
+ ~Mask() { hwloc_bitmap_free(mask); }
+ void set(int i) override { hwloc_bitmap_set(mask, i); }
+ bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
+ void clear(int i) override { hwloc_bitmap_clr(mask, i); }
+ void zero() override { hwloc_bitmap_zero(mask); }
+ void copy(const KMPAffinity::Mask* src) override {
+ const Mask* convert = static_cast<const Mask*>(src);
+ hwloc_bitmap_copy(mask, convert->mask);
+ }
+ void bitwise_and(const KMPAffinity::Mask* rhs) override {
+ const Mask* convert = static_cast<const Mask*>(rhs);
+ hwloc_bitmap_and(mask, mask, convert->mask);
+ }
+ void bitwise_or(const KMPAffinity::Mask * rhs) override {
+ const Mask* convert = static_cast<const Mask*>(rhs);
+ hwloc_bitmap_or(mask, mask, convert->mask);
+ }
+ void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
+ int begin() const override { return hwloc_bitmap_first(mask); }
+ int end() const override { return -1; }
+ int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
+ int get_system_affinity(bool abort_on_error) override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+ }
+ return error;
+ }
+ int set_system_affinity(bool abort_on_error) const override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+ }
+ return error;
+ }
+ int get_proc_group() const override {
+ int i;
+ int group = -1;
+# if KMP_OS_WINDOWS
+ if (__kmp_num_proc_groups == 1) {
+ return 1;
+ }
+ for (i = 0; i < __kmp_num_proc_groups; i++) {
+ // On windows, the long type is always 32 bits
+ unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
+ unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
+ if (first_32_bits == 0 && second_32_bits == 0) {
+ continue;
+ }
+ if (group >= 0) {
+ return -1;
+ }
+ group = i;
+ }
+# endif /* KMP_OS_WINDOWS */
+ return group;
+ }
+ };
+ void determine_capable(const char* var) override {
+ const hwloc_topology_support* topology_support;
+ if(__kmp_hwloc_topology == NULL) {
+ if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
+ __kmp_hwloc_error = TRUE;
+ if(__kmp_affinity_verbose)
+ KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
+ }
+ if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
+ __kmp_hwloc_error = TRUE;
+ if(__kmp_affinity_verbose)
+ KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
+ }
+ }
+ topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
+ // Is the system capable of setting/getting this thread's affinity?
+ // also, is topology discovery possible? (pu indicates ability to discover processing units)
+ // and finally, were there no errors when calling any hwloc_* API functions?
+ if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
+ topology_support->cpubind->get_thisthread_cpubind &&
+ topology_support->discovery->pu &&
+ !__kmp_hwloc_error)
+ {
+ // enables affinity according to KMP_AFFINITY_CAPABLE() macro
+ KMP_AFFINITY_ENABLE(TRUE);
+ } else {
+ // indicate that hwloc didn't work and disable affinity
+ __kmp_hwloc_error = TRUE;
+ KMP_AFFINITY_DISABLE();
+ }
+ }
+ void bind_thread(int which) override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal set affinity operation when not capable");
+ KMPAffinity::Mask *mask;
+ KMP_CPU_ALLOC_ON_STACK(mask);
+ KMP_CPU_ZERO(mask);
+ KMP_CPU_SET(which, mask);
+ __kmp_set_system_affinity(mask, TRUE);
+ KMP_CPU_FREE_FROM_STACK(mask);
+ }
+ KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
+ void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
+ KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
+ void deallocate_mask_array(KMPAffinity::Mask* array) override {
+ Mask* hwloc_array = static_cast<Mask*>(array);
+ delete[] hwloc_array;
+ }
+ KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
+ Mask* hwloc_array = static_cast<Mask*>(array);
+ return &(hwloc_array[index]);
+ }
+ api_type get_api_type() const override { return HWLOC; }
+};
+#endif /* KMP_USE_HWLOC */
+
+#if KMP_OS_LINUX
+/*
+ * On some of the older OS's that we build on, these constants aren't present
+ * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
+ * all systems of the same arch where they are defined, and they cannot change.
+ * stone forever.
+ */
+#include <sys/syscall.h>
+# if KMP_ARCH_X86 || KMP_ARCH_ARM
+# ifndef __NR_sched_setaffinity
+# define __NR_sched_setaffinity 241
+# elif __NR_sched_setaffinity != 241
+# error Wrong code for setaffinity system call.
+# endif /* __NR_sched_setaffinity */
+# ifndef __NR_sched_getaffinity
+# define __NR_sched_getaffinity 242
+# elif __NR_sched_getaffinity != 242
+# error Wrong code for getaffinity system call.
+# endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_AARCH64
+# ifndef __NR_sched_setaffinity
+# define __NR_sched_setaffinity 122
+# elif __NR_sched_setaffinity != 122
+# error Wrong code for setaffinity system call.
+# endif /* __NR_sched_setaffinity */
+# ifndef __NR_sched_getaffinity
+# define __NR_sched_getaffinity 123
+# elif __NR_sched_getaffinity != 123
+# error Wrong code for getaffinity system call.
+# endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_X86_64
+# ifndef __NR_sched_setaffinity
+# define __NR_sched_setaffinity 203
+# elif __NR_sched_setaffinity != 203
+# error Wrong code for setaffinity system call.
+# endif /* __NR_sched_setaffinity */
+# ifndef __NR_sched_getaffinity
+# define __NR_sched_getaffinity 204
+# elif __NR_sched_getaffinity != 204
+# error Wrong code for getaffinity system call.
+# endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_PPC64
+# ifndef __NR_sched_setaffinity
+# define __NR_sched_setaffinity 222
+# elif __NR_sched_setaffinity != 222
+# error Wrong code for setaffinity system call.
+# endif /* __NR_sched_setaffinity */
+# ifndef __NR_sched_getaffinity
+# define __NR_sched_getaffinity 223
+# elif __NR_sched_getaffinity != 223
+# error Wrong code for getaffinity system call.
+# endif /* __NR_sched_getaffinity */
+# else
+# error Unknown or unsupported architecture
+# endif /* KMP_ARCH_* */
+class KMPNativeAffinity : public KMPAffinity {
+ class Mask : public KMPAffinity::Mask {
+ typedef unsigned char mask_t;
+ static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
+ public:
+ mask_t* mask;
+ Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
+ ~Mask() { if (mask) __kmp_free(mask); }
+ void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
+ bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
+ void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
+ void zero() override {
+ for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+ mask[i] = 0;
+ }
+ void copy(const KMPAffinity::Mask* src) override {
+ const Mask * convert = static_cast<const Mask*>(src);
+ for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+ mask[i] = convert->mask[i];
+ }
+ void bitwise_and(const KMPAffinity::Mask* rhs) override {
+ const Mask * convert = static_cast<const Mask*>(rhs);
+ for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+ mask[i] &= convert->mask[i];
+ }
+ void bitwise_or(const KMPAffinity::Mask* rhs) override {
+ const Mask * convert = static_cast<const Mask*>(rhs);
+ for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+ mask[i] |= convert->mask[i];
+ }
+ void bitwise_not() override {
+ for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+ mask[i] = ~(mask[i]);
+ }
+ int begin() const override {
+ int retval = 0;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
+ int next(int previous) const override {
+ int retval = previous+1;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int get_system_affinity(bool abort_on_error) override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+ }
+ return error;
+ }
+ int set_system_affinity(bool abort_on_error) const override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+ }
+ return error;
+ }
+ };
+ void determine_capable(const char* env_var) override {
+ __kmp_affinity_determine_capable(env_var);
+ }
+ void bind_thread(int which) override {
+ __kmp_affinity_bind_thread(which);
+ }
+ KMPAffinity::Mask* allocate_mask() override {
+ KMPNativeAffinity::Mask* retval = new Mask();
+ return retval;
+ }
+ void deallocate_mask(KMPAffinity::Mask* m) override {
+ KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
+ delete m;
+ }
+ KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
+ void deallocate_mask_array(KMPAffinity::Mask* array) override {
+ Mask* linux_array = static_cast<Mask*>(array);
+ delete[] linux_array;
+ }
+ KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
+ Mask* linux_array = static_cast<Mask*>(array);
+ return &(linux_array[index]);
+ }
+ api_type get_api_type() const override { return NATIVE_OS; }
+};
+#endif /* KMP_OS_LINUX */
+
+#if KMP_OS_WINDOWS
+class KMPNativeAffinity : public KMPAffinity {
+ class Mask : public KMPAffinity::Mask {
+ typedef ULONG_PTR mask_t;
+ static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
+ mask_t* mask;
+ public:
+ Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
+ ~Mask() { if (mask) __kmp_free(mask); }
+ void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
+ bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
+ void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
+ void zero() override {
+ for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+ mask[i] = 0;
+ }
+ void copy(const KMPAffinity::Mask* src) override {
+ const Mask * convert = static_cast<const Mask*>(src);
+ for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+ mask[i] = convert->mask[i];
+ }
+ void bitwise_and(const KMPAffinity::Mask* rhs) override {
+ const Mask * convert = static_cast<const Mask*>(rhs);
+ for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+ mask[i] &= convert->mask[i];
+ }
+ void bitwise_or(const KMPAffinity::Mask* rhs) override {
+ const Mask * convert = static_cast<const Mask*>(rhs);
+ for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+ mask[i] |= convert->mask[i];
+ }
+ void bitwise_not() override {
+ for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+ mask[i] = ~(mask[i]);
+ }
+ int begin() const override {
+ int retval = 0;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
+ int next(int previous) const override {
+ int retval = previous+1;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int set_system_affinity(bool abort_on_error) const override {
+ if (__kmp_num_proc_groups > 1) {
+ // Check for a valid mask.
+ GROUP_AFFINITY ga;
+ int group = get_proc_group();
+ if (group < 0) {
+ if (abort_on_error) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+ }
+ return -1;
+ }
+ // Transform the bit vector into a GROUP_AFFINITY struct
+ // and make the system call to set affinity.
+ ga.Group = group;
+ ga.Mask = mask[group];
+ ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
+
+ KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
+ if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
+ KMP_ERR( error ), __kmp_msg_null);
+ }
+ return error;
+ }
+ } else {
+ if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
+ KMP_ERR( error ), __kmp_msg_null);
+ }
+ return error;
+ }
+ }
+ return 0;
+ }
+ int get_system_affinity(bool abort_on_error) override {
+ if (__kmp_num_proc_groups > 1) {
+ this->zero();
+ GROUP_AFFINITY ga;
+ KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
+ if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
+ return -1;
+ }
+ mask[ga.Group] = ga.Mask;
+ } else {
+ mask_t newMask, sysMask, retval;
+ if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
+ if (! retval) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
+ if (! newMask) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ }
+ *mask = retval;
+ }
+ return 0;
+ }
+ int get_proc_group() const override {
+ int group = -1;
+ if (__kmp_num_proc_groups == 1) {
+ return 1;
+ }
+ for (int i = 0; i < __kmp_num_proc_groups; i++) {
+ if (mask[i] == 0)
+ continue;
+ if (group >= 0)
+ return -1;
+ group = i;
+ }
+ return group;
+ }
+ };
+ void determine_capable(const char* env_var) override {
+ __kmp_affinity_determine_capable(env_var);
+ }
+ void bind_thread(int which) override {
+ __kmp_affinity_bind_thread(which);
+ }
+ KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
+ void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
+ KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
+ void deallocate_mask_array(KMPAffinity::Mask* array) override {
+ Mask* windows_array = static_cast<Mask*>(array);
+ delete[] windows_array;
+ }
+ KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
+ Mask* windows_array = static_cast<Mask*>(array);
+ return &(windows_array[index]);
+ }
+ api_type get_api_type() const override { return NATIVE_OS; }
+};
+#endif /* KMP_OS_WINDOWS */
+#endif /* KMP_AFFINITY_SUPPORTED */
+
class Address {
public:
static const unsigned maxDepth = 32;
diff --git a/openmp/runtime/src/kmp_ftn_cdecl.c b/openmp/runtime/src/kmp_ftn_cdecl.c
index 51fa1bf8954..5844bee85bc 100644
--- a/openmp/runtime/src/kmp_ftn_cdecl.c
+++ b/openmp/runtime/src/kmp_ftn_cdecl.c
@@ -14,6 +14,7 @@
#include "kmp.h"
+#include "kmp_affinity.h"
#if KMP_OS_WINDOWS
# if defined KMP_WIN_CDECL || !defined KMP_DYNAMIC_LIB
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 4089084de4c..13501e6dc42 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -279,15 +279,13 @@ FTN_CREATE_AFFINITY_MASK( void **mask )
//
// We really only NEED serial initialization here.
//
+ kmp_affin_mask_t* mask_internals;
if ( ! TCR_4(__kmp_init_middle) ) {
__kmp_middle_initialize();
}
- # if KMP_USE_HWLOC
- *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc();
- # else
- *mask = kmpc_malloc( __kmp_affin_mask_size );
- # endif
- KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) );
+ mask_internals = __kmp_affinity_dispatch->allocate_mask();
+ KMP_CPU_ZERO( mask_internals );
+ *mask = mask_internals;
#endif
}
@@ -300,6 +298,7 @@ FTN_DESTROY_AFFINITY_MASK( void **mask )
//
// We really only NEED serial initialization here.
//
+ kmp_affin_mask_t* mask_internals;
if ( ! TCR_4(__kmp_init_middle) ) {
__kmp_middle_initialize();
}
@@ -308,11 +307,8 @@ FTN_DESTROY_AFFINITY_MASK( void **mask )
KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" );
}
}
- # if KMP_USE_HWLOC
- hwloc_bitmap_free((hwloc_cpuset_t)(*mask));
- # else
- kmpc_free( *mask );
- # endif
+ mask_internals = (kmp_affin_mask_t*)(*mask);
+ __kmp_affinity_dispatch->deallocate_mask(mask_internals);
*mask = NULL;
#endif
}
diff --git a/openmp/runtime/src/kmp_ftn_extra.c b/openmp/runtime/src/kmp_ftn_extra.c
index 1d0fb4ca13f..73c4e2d45b7 100644
--- a/openmp/runtime/src/kmp_ftn_extra.c
+++ b/openmp/runtime/src/kmp_ftn_extra.c
@@ -14,6 +14,7 @@
#include "kmp.h"
+#include "kmp_affinity.h"
#if KMP_OS_WINDOWS
# define KMP_FTN_ENTRIES KMP_FTN_PLAIN
diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c
index f46199d61f1..5519696f8e3 100644
--- a/openmp/runtime/src/kmp_global.c
+++ b/openmp/runtime/src/kmp_global.c
@@ -14,6 +14,7 @@
#include "kmp.h"
+#include "kmp_affinity.h"
kmp_key_t __kmp_gtid_threadprivate_key;
@@ -222,21 +223,22 @@ enum mic_type __kmp_mic_type = non_mic;
#if KMP_AFFINITY_SUPPORTED
+KMPAffinity* __kmp_affinity_dispatch = NULL;
+
# if KMP_USE_HWLOC
int __kmp_hwloc_error = FALSE;
hwloc_topology_t __kmp_hwloc_topology = NULL;
# endif
-# if KMP_GROUP_AFFINITY
-
+# if KMP_OS_WINDOWS
+# if KMP_GROUP_AFFINITY
int __kmp_num_proc_groups = 1;
-
+# endif /* KMP_GROUP_AFFINITY */
kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL;
kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL;
kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL;
kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL;
-
-# endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_OS_WINDOWS */
size_t __kmp_affin_mask_size = 0;
enum affinity_type __kmp_affinity_type = affinity_default;
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 3ef11ef8cc8..e439150d113 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -25,6 +25,7 @@
#include "kmp_error.h"
#include "kmp_stats.h"
#include "kmp_wait_release.h"
+#include "kmp_affinity.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
diff --git a/openmp/runtime/src/kmp_settings.c b/openmp/runtime/src/kmp_settings.c
index bec16657949..8576165f03b 100644
--- a/openmp/runtime/src/kmp_settings.c
+++ b/openmp/runtime/src/kmp_settings.c
@@ -23,6 +23,7 @@
#include "kmp_i18n.h"
#include "kmp_lock.h"
#include "kmp_io.h"
+#include "kmp_affinity.h"
static int __kmp_env_toPrint( char const * name, int flag );
@@ -5339,44 +5340,12 @@ __kmp_env_initialize( char const * string ) {
// affinity.
//
const char *var = "KMP_AFFINITY";
-# if KMP_USE_HWLOC
- if(__kmp_hwloc_topology == NULL) {
- if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
- __kmp_hwloc_error = TRUE;
- if(__kmp_affinity_verbose)
- KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
- }
- if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
- __kmp_hwloc_error = TRUE;
- if(__kmp_affinity_verbose)
- KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
- }
- }
-# endif
+ KMPAffinity::pick_api();
if ( __kmp_affinity_type == affinity_disabled ) {
KMP_AFFINITY_DISABLE();
}
else if ( ! KMP_AFFINITY_CAPABLE() ) {
-# if KMP_USE_HWLOC
- const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
- // Is the system capable of setting/getting this thread's affinity?
- // also, is topology discovery possible? (pu indicates ability to discover processing units)
- // and finally, were there no errors when calling any hwloc_* API functions?
- if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
- topology_support->cpubind->get_thisthread_cpubind &&
- topology_support->discovery->pu &&
- !__kmp_hwloc_error)
- {
- // enables affinity according to KMP_AFFINITY_CAPABLE() macro
- KMP_AFFINITY_ENABLE(TRUE);
- } else {
- // indicate that hwloc didn't work and disable affinity
- __kmp_hwloc_error = TRUE;
- KMP_AFFINITY_DISABLE();
- }
-# else
- __kmp_affinity_determine_capable( var );
-# endif // KMP_USE_HWLOC
+ __kmp_affinity_dispatch->determine_capable(var);
if ( ! KMP_AFFINITY_CAPABLE() ) {
if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings
&& ( __kmp_affinity_type != affinity_default )
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index b3c3e80083b..389f4d543db 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -22,6 +22,7 @@
#include "kmp_io.h"
#include "kmp_stats.h"
#include "kmp_wait_release.h"
+#include "kmp_affinity.h"
#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD
# include <alloca.h>
@@ -113,118 +114,6 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond )
* Affinity support
*/
-/*
- * On some of the older OS's that we build on, these constants aren't present
- * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
- * all systems of the same arch where they are defined, and they cannot change.
- * stone forever.
- */
-
-# if KMP_ARCH_X86 || KMP_ARCH_ARM
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 241
-# elif __NR_sched_setaffinity != 241
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 242
-# elif __NR_sched_getaffinity != 242
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-
-# elif KMP_ARCH_AARCH64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 122
-# elif __NR_sched_setaffinity != 122
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 123
-# elif __NR_sched_getaffinity != 123
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-
-# elif KMP_ARCH_X86_64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 203
-# elif __NR_sched_setaffinity != 203
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 204
-# elif __NR_sched_getaffinity != 204
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-
-# elif KMP_ARCH_PPC64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 222
-# elif __NR_sched_setaffinity != 222
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 223
-# elif __NR_sched_getaffinity != 223
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-
-
-# else
-# error Unknown or unsupported architecture
-
-# endif /* KMP_ARCH_* */
-
-int
-__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
-{
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set affinity operation when not capable");
-#if KMP_USE_HWLOC
- int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
-#else
- int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
-#endif
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG( FatalSysError ),
- KMP_ERR( error ),
- __kmp_msg_null
- );
- }
- return error;
-}
-
-int
-__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
-{
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
-
-#if KMP_USE_HWLOC
- int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
-#else
- int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
-#endif
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG( FatalSysError ),
- KMP_ERR( error ),
- __kmp_msg_null
- );
- }
- return error;
-}
-
void
__kmp_affinity_bind_thread( int which )
{
diff --git a/openmp/runtime/src/z_Windows_NT_util.c b/openmp/runtime/src/z_Windows_NT_util.c
index 53b8eb8d746..8daa7d51993 100644
--- a/openmp/runtime/src/z_Windows_NT_util.c
+++ b/openmp/runtime/src/z_Windows_NT_util.c
@@ -18,6 +18,7 @@
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_wait_release.h"
+#include "kmp_affinity.h"
/* This code is related to NtQuerySystemInformation() function. This function
is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
@@ -127,9 +128,7 @@ HMODULE ntdll = NULL;
/* End of NtQuerySystemInformation()-related code */
-#if KMP_GROUP_AFFINITY
static HMODULE kernel32 = NULL;
-#endif /* KMP_GROUP_AFFINITY */
/* ----------------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------------------- */
@@ -542,227 +541,9 @@ __kmp_gtid_get_specific()
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
-#if KMP_GROUP_AFFINITY
-
-//
-// Only 1 DWORD in the mask should have any procs set.
-// Return the appropriate index, or -1 for an invalid mask.
-//
-int
-__kmp_get_proc_group( kmp_affin_mask_t const *mask )
-{
- int i;
- int group = -1;
- for (i = 0; i < __kmp_num_proc_groups; i++) {
-#if KMP_USE_HWLOC
- // On windows, the long type is always 32 bits
- unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2);
- unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1);
- if (first_32_bits == 0 && second_32_bits == 0) {
- continue;
- }
-#else
- if (mask[i] == 0) {
- continue;
- }
-#endif
- if (group >= 0) {
- return -1;
- }
- group = i;
- }
- return group;
-}
-
-#endif /* KMP_GROUP_AFFINITY */
-
-int
-__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
-{
-#if KMP_USE_HWLOC
- int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG( FatalSysError ),
- KMP_ERR( error ),
- __kmp_msg_null
- );
- }
- return error;
-#else
-# if KMP_GROUP_AFFINITY
-
- if (__kmp_num_proc_groups > 1) {
- //
- // Check for a valid mask.
- //
- GROUP_AFFINITY ga;
- int group = __kmp_get_proc_group( mask );
- if (group < 0) {
- if (abort_on_error) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
- return -1;
- }
-
- //
- // Transform the bit vector into a GROUP_AFFINITY struct
- // and make the system call to set affinity.
- //
- ga.Group = group;
- ga.Mask = mask[group];
- ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
-
- KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
- if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG( CantSetThreadAffMask ),
- KMP_ERR( error ),
- __kmp_msg_null
- );
- }
- return error;
- }
- }
- else
-
-# endif /* KMP_GROUP_AFFINITY */
-
- {
- if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG( CantSetThreadAffMask ),
- KMP_ERR( error ),
- __kmp_msg_null
- );
- }
- return error;
- }
- }
-#endif /* KMP_USE_HWLOC */
- return 0;
-}
-
-int
-__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
-{
-#if KMP_USE_HWLOC
- int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG( FatalSysError ),
- KMP_ERR( error ),
- __kmp_msg_null
- );
- }
- return error;
-#else /* KMP_USE_HWLOC */
-# if KMP_GROUP_AFFINITY
-
- if (__kmp_num_proc_groups > 1) {
- KMP_CPU_ZERO(mask);
- GROUP_AFFINITY ga;
- KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
-
- if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
- KMP_ERR(error),
- __kmp_msg_null
- );
- }
- return error;
- }
-
- if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups)
- || (ga.Mask == 0)) {
- return -1;
- }
-
- mask[ga.Group] = ga.Mask;
- }
- else
-
-# endif /* KMP_GROUP_AFFINITY */
-
- {
- kmp_affin_mask_t newMask, sysMask, retval;
-
- if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
- KMP_ERR(error),
- __kmp_msg_null
- );
- }
- return error;
- }
- retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
- if (! retval) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
- KMP_ERR(error),
- __kmp_msg_null
- );
- }
- return error;
- }
- newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
- if (! newMask) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
- KMP_ERR(error),
- __kmp_msg_null
- );
- }
- }
- *mask = retval;
- }
-#endif /* KMP_USE_HWLOC */
- return 0;
-}
-
void
__kmp_affinity_bind_thread( int proc )
{
-#if KMP_USE_HWLOC
- kmp_affin_mask_t *mask;
- KMP_CPU_ALLOC_ON_STACK(mask);
- KMP_CPU_ZERO(mask);
- KMP_CPU_SET(proc, mask);
- __kmp_set_system_affinity(mask, TRUE);
- KMP_CPU_FREE_FROM_STACK(mask);
-#else /* KMP_USE_HWLOC */
-# if KMP_GROUP_AFFINITY
-
if (__kmp_num_proc_groups > 1) {
//
// Form the GROUP_AFFINITY struct directly, rather than filling
@@ -787,18 +568,14 @@ __kmp_affinity_bind_thread( int proc )
);
}
}
+ } else {
+ kmp_affin_mask_t *mask;
+ KMP_CPU_ALLOC_ON_STACK(mask);
+ KMP_CPU_ZERO(mask);
+ KMP_CPU_SET(proc, mask);
+ __kmp_set_system_affinity(mask, TRUE);
+ KMP_CPU_FREE_FROM_STACK(mask);
}
- else
-
-# endif /* KMP_GROUP_AFFINITY */
-
- {
- kmp_affin_mask_t mask;
- KMP_CPU_ZERO(&mask);
- KMP_CPU_SET(proc, &mask);
- __kmp_set_system_affinity(&mask, TRUE);
- }
-#endif /* KMP_USE_HWLOC */
}
void
OpenPOWER on IntegriCloud