summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp.h
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp.h')
-rw-r--r--openmp/runtime/src/kmp.h640
1 files changed, 294 insertions, 346 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index f5dd10f8baa..6daf9735601 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1,8 +1,8 @@
/*! \file */
/*
* kmp.h -- KPTS runtime header file.
- * $Revision: 42816 $
- * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
+ * $Revision: 43473 $
+ * $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $
*/
@@ -28,8 +28,6 @@
/* Defines for OpenMP 3.0 tasking and auto scheduling */
-#if OMP_30_ENABLED
-
# ifndef KMP_STATIC_STEAL_ENABLED
# define KMP_STATIC_STEAL_ENABLED 1
# endif
@@ -56,8 +54,6 @@
#define TASK_EXPLICIT 1
#define TASK_IMPLICIT 0
-#endif // OMP_30_ENABLED
-
#define KMP_CANCEL_THREADS
#define KMP_THREAD_ATTR
@@ -79,6 +75,10 @@
#include "kmp_os.h"
+#if KMP_STATS_ENABLED
+class kmp_stats_list;
+#endif
+
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#include <xmmintrin.h>
#endif
@@ -125,6 +125,24 @@
#define USE_FAST_MEMORY 3
#endif
+#ifndef KMP_NESTED_HOT_TEAMS
+# define KMP_NESTED_HOT_TEAMS 0
+# define USE_NESTED_HOT_ARG(x)
+#else
+# if KMP_NESTED_HOT_TEAMS
+# if OMP_40_ENABLED
+# define USE_NESTED_HOT_ARG(x) ,x
+# else
+// Nested hot teams feature depends on omp 4.0, disable it for earlier versions
+# undef KMP_NESTED_HOT_TEAMS
+# define KMP_NESTED_HOT_TEAMS 0
+# define USE_NESTED_HOT_ARG(x)
+# endif
+# else
+# define USE_NESTED_HOT_ARG(x)
+# endif
+#endif
+
// Assume using BGET compare_exchange instruction instead of lock by default.
#ifndef USE_CMP_XCHG_FOR_BGET
#define USE_CMP_XCHG_FOR_BGET 1
@@ -459,15 +477,6 @@ typedef int PACKED_REDUCTION_METHOD_T;
/*
* Only Linux* OS and Windows* OS support thread affinity.
*/
-#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64
-# define KMP_AFFINITY_SUPPORTED 1
-#elif KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_CNK || KMP_ARCH_PPC64
-// affinity not supported
-# define KMP_AFFINITY_SUPPORTED 0
-#else
-# error "Unknown or unsupported OS"
-#endif
-
#if KMP_AFFINITY_SUPPORTED
extern size_t __kmp_affin_mask_size;
@@ -540,11 +549,14 @@ typedef unsigned char kmp_affin_mask_t;
# if KMP_ARCH_X86_64
+// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
+# if _MSC_VER < 1600
typedef struct GROUP_AFFINITY {
- KAFFINITY mask;
- WORD group;
- WORD reserved[3];
+ KAFFINITY Mask;
+ WORD Group;
+ WORD Reserved[3];
} GROUP_AFFINITY;
+# endif
typedef DWORD_PTR kmp_affin_mask_t;
@@ -798,7 +810,6 @@ extern unsigned int __kmp_place_core_offset;
#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
#define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
- /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : */ /* TODO remove this check, it is redundant */ \
__kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
#define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
@@ -865,6 +876,9 @@ extern unsigned int __kmp_place_core_offset;
#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
+#define KMP_MIN_STKPADDING (0)
+#define KMP_MAX_STKPADDING (2 * 1024 * 1024)
+
#define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */
#define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */
#define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */
@@ -952,12 +966,14 @@ extern unsigned int __kmp_place_core_offset;
#elif KMP_OS_LINUX
# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
-#elif KMP_OS_DARWIN || KMP_OS_FREEBSD
-/* TODO: tune for OS */
+#elif KMP_OS_DARWIN
+/* TODO: tune for KMP_OS_DARWIN */
+# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#elif KMP_OS_FREEBSD
+/* TODO: tune for KMP_OS_FREEBSD */
# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
-#else
-# error "Unknown or unsupported OS"
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -968,12 +984,14 @@ struct kmp_cpuid {
kmp_uint32 edx;
};
extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
-# if KMP_MIC
+# if KMP_ARCH_X86
+ extern void __kmp_x86_pause( void );
+# elif KMP_MIC
static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); };
# else
- extern void __kmp_x86_pause( void );
+ static void __kmp_x86_pause( void ) { _mm_pause(); };
# endif
-# define KMP_CPU_PAUSE() __kmp_x86_pause()
+# define KMP_CPU_PAUSE() __kmp_x86_pause()
#elif KMP_ARCH_PPC64
# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
@@ -985,7 +1003,7 @@ extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
#define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; }
-#define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_static_yield( (cond) ); }
+#define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); }
// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd.
@@ -1533,6 +1551,9 @@ typedef struct kmp_disp {
dispatch_private_info_t *th_disp_buffer;
kmp_int32 th_disp_index;
void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
+#if KMP_USE_INTERNODE_ALIGNMENT
+ char more_padding[INTERNODE_CACHE_LINE];
+#endif
} kmp_disp_t;
/* ------------------------------------------------------------------------ */
@@ -1557,6 +1578,12 @@ typedef struct kmp_disp {
# error "Barrier unused bit must be smaller than barrier bump bit"
#endif
+// Constants for release barrier wait state: currently, hierarchical only
+#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
+#define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release
+#define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release
+#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go
+#define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up
enum barrier_type {
bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */
@@ -1576,16 +1603,58 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */
bp_linear_bar = 0, /* Single level (degenerate) tree */
bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */
bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */
- bp_last_bar = 3 /* Placeholder to mark the end */
+ bp_hierarchical_bar = 3, /* Machine hierarchy tree */
+ bp_last_bar = 4 /* Placeholder to mark the end */
} kmp_bar_pat_e;
+# define KMP_BARRIER_ICV_PUSH 1
+
+/* Record for holding the values of the internal controls stack records */
+typedef struct kmp_internal_control {
+ int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */
+ kmp_int8 nested; /* internal control for nested parallelism (per thread) */
+ kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */
+ kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */
+ int blocktime; /* internal control for blocktime */
+ int bt_intervals; /* internal control for blocktime intervals */
+ int nproc; /* internal control for #threads for next parallel region (per thread) */
+ int max_active_levels; /* internal control for max_active_levels */
+ kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */
+#if OMP_40_ENABLED
+ kmp_proc_bind_t proc_bind; /* internal control for affinity */
+#endif // OMP_40_ENABLED
+ struct kmp_internal_control *next;
+} kmp_internal_control_t;
+
+static inline void
+copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
+ *dst = *src;
+}
+
/* Thread barrier needs volatile barrier fields */
typedef struct KMP_ALIGN_CACHE kmp_bstate {
- volatile kmp_uint b_arrived; /* STATE => task reached synch point. */
- #if (KMP_PERF_V19 == KMP_ON)
- KMP_ALIGN_CACHE
- #endif
- volatile kmp_uint b_go; /* STATE => task should proceed. */
+ // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it).
+ // It is not explicitly aligned below, because we *don't* want it to be padded -- instead,
+ // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines
+ // stores in the hierarchical barrier.
+ kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
+ // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store
+ volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
+ KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point.
+ kmp_uint32 *skip_per_level;
+ kmp_uint32 my_level;
+ kmp_int32 parent_tid;
+ kmp_uint32 old_tid;
+ kmp_uint32 depth;
+ struct kmp_bstate *parent_bar;
+ kmp_team_t *team;
+ kmp_uint64 leaf_state;
+ kmp_uint32 nproc;
+ kmp_uint8 base_leaf_kids;
+ kmp_uint8 leaf_kids;
+ kmp_uint8 offset;
+ kmp_uint8 wait_flag;
+ kmp_uint8 use_oncore_barrier;
} kmp_bstate_t;
union KMP_ALIGN_CACHE kmp_barrier_union {
@@ -1698,7 +1767,6 @@ typedef union KMP_ALIGN_CACHE kmp_desc {
typedef struct kmp_local {
volatile int this_construct; /* count of single's encountered by thread */
- volatile int last_construct; /* cache for team's count used by old algorithm */
void *reduce_data;
#if KMP_USE_BGET
void *bget_data;
@@ -1721,151 +1789,54 @@ typedef struct kmp_local {
} kmp_local_t;
-/* Record for holding the values of the internal controls stack records */
-typedef struct KMP_ALIGN_CACHE kmp_internal_control {
- int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */
- int nested; /* internal control for nested parallelism (per thread) */
- int dynamic; /* internal control for dynamic adjustment of threads (per thread) */
- int nproc; /* internal control for # of threads for next parallel region (per thread) */
- int blocktime; /* internal control for blocktime */
- int bt_intervals; /* internal control for blocktime intervals */
- int bt_set; /* internal control for whether blocktime is explicitly set */
-#if OMP_30_ENABLED
- int max_active_levels; /* internal control for max_active_levels */
- kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */
-#endif // OMP_30_ENABLED
-#if OMP_40_ENABLED
- kmp_proc_bind_t proc_bind; /* internal control for affinity */
-#endif // OMP_40_ENABLED
- struct kmp_internal_control *next;
-
-} kmp_internal_control_t;
-
-#if OMP_30_ENABLED
-static inline void
-copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
- *dst = *src;
-}
-#endif // OMP_30_ENABLED
-
-#if OMP_30_ENABLED
-
- #define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
- #define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
- #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
+#define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
+#define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
+#define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
- #define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
- #define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
- #define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
- #define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
+#define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
+#define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
+#define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
+#define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
- #define set__blocktime_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) )
+#define set__blocktime_team( xteam, xtid, xval ) \
+ ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) )
- #define set__bt_intervals_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
+#define set__bt_intervals_team( xteam, xtid, xval ) \
+ ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
- #define set__bt_set_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) )
+#define set__bt_set_team( xteam, xtid, xval ) \
+ ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) )
+#define set__nested( xthread, xval ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) )
+#define get__nested( xthread ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) )
- #define set__nested( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_threads[0] ->th.th_current_task->td_icvs.nested ) = \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nested ) = \
- (xval) )
- #define get__nested( xthread ) \
- ( ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nested ) \
- ? (FTN_TRUE) : (FTN_FALSE) )
+#define set__dynamic( xthread, xval ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) )
+#define get__dynamic( xthread ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) )
- #define set__dynamic( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_threads[0] ->th.th_current_task->td_icvs.dynamic ) = \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.dynamic ) = \
- (xval) )
- #define get__dynamic( xthread ) \
- ( ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.dynamic ) \
- ? (FTN_TRUE) : (FTN_FALSE) )
+#define set__nproc( xthread, xval ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) )
- #define set__nproc( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_threads[0] ->th.th_current_task->td_icvs.nproc ) = \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nproc ) = \
- (xval) )
+#define set__max_active_levels( xthread, xval ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) )
- #define set__nproc_p( xthread, xval ) \
- ( \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nproc ) = \
- (xval) )
-
- #define set__max_active_levels( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_threads[0] ->th.th_current_task->td_icvs.max_active_levels ) = \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.max_active_levels ) = \
- (xval) )
-
- #define set__sched( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_threads[0] ->th.th_current_task->td_icvs.sched ) = \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.sched ) = \
- (xval) )
+#define set__sched( xthread, xval ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) )
#if OMP_40_ENABLED
- #define set__proc_bind( xthread, xval ) \
- ( \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.proc_bind ) = \
- (xval) )
-
- #define get__proc_bind( xthread ) \
- ( (xthread)->th.th_team ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.proc_bind )
+#define set__proc_bind( xthread, xval ) \
+ ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) )
+#define get__proc_bind( xthread ) \
+ ( (xthread)->th.th_current_task->td_icvs.proc_bind )
#endif /* OMP_40_ENABLED */
-#else
-
- #define get__blocktime( xteam, xtid ) ((xteam)->t.t_set_blocktime[ (xtid)])
- #define get__bt_set( xteam, xtid ) ((xteam)->t.t_set_bt_set[ (xtid)])
- #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_set_bt_intervals[(xtid)])
-
- #define set__nested( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_set_nested[0] ) = \
- ( (xthread)->th.th_team->t.t_set_nested[((xthread)->th.th_info.ds.ds_tid)] ) = \
- (xval) )
- #define get__nested( xthread ) \
- ( ( (xthread)->th.th_team->t.t_set_nested[((xthread)->th.th_info.ds.ds_tid)] ) \
- ? (FTN_TRUE) : (FTN_FALSE) )
-
- #define set__dynamic( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_set_dynamic[0] ) = \
- ( (xthread)->th.th_team->t.t_set_dynamic[((xthread)->th.th_info.ds.ds_tid)] ) = \
- (xval) )
- #define get__dynamic( xthread ) \
- ( ( (xthread)->th.th_team->t.t_set_dynamic[((xthread)->th.th_info.ds.ds_tid)] ) \
- ? (FTN_TRUE) : (FTN_FALSE) )
-
- #define set__nproc( xthread, xval ) \
- ( ( (xthread)->th.th_serial_team->t.t_set_nproc[0] ) = \
- ( (xthread)->th.th_team->t.t_set_nproc[((xthread)->th.th_info.ds.ds_tid)] ) = \
- (xval) )
-
- #define set__nproc_p( xthread, xval ) \
- ( ( (xthread)->th.th_team->t.t_set_nproc[((xthread)->th.th_info.ds.ds_tid)] ) = (xval) )
-
- #define set__blocktime_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_set_blocktime[(xtid)] ) = (xval) )
- #define set__bt_intervals_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_set_bt_intervals[(xtid)] ) = (xval) )
-
- #define set__bt_set_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_set_bt_set[(xtid)] ) = (xval) )
-
- #define get__nested_2(xteam,xtid) ( (xteam)->t.t_set_nested[(xtid)] )
- #define get__dynamic_2(xteam,xtid) ( (xteam)->t.t_set_dynamic[(xtid)] )
- #define get__nproc_2(xteam,xtid) ( (xteam)->t.t_set_nproc[(xtid)] )
- #define get__sched_2(xteam,xtid) ( (xteam)->t.t_set_sched[(xtid)] )
-
-
-#endif
-
-#if OMP_30_ENABLED
/* ------------------------------------------------------------------------ */
// OpenMP tasking data structures
//
@@ -1931,7 +1902,7 @@ typedef struct kmp_dephash_entry kmp_dephash_entry_t;
typedef struct kmp_depend_info {
kmp_intptr_t base_addr;
- size_t len;
+ size_t len;
struct {
bool in:1;
bool out:1;
@@ -1947,13 +1918,13 @@ typedef struct kmp_base_depnode {
kmp_depnode_list_t * successors;
kmp_task_t * task;
- kmp_lock_t lock;
+ kmp_lock_t lock;
#if KMP_SUPPORT_GRAPH_OUTPUT
kmp_uint32 id;
#endif
- volatile kmp_int32 npredecessors;
+ volatile kmp_int32 npredecessors;
volatile kmp_int32 nrefs;
} kmp_base_depnode_t;
@@ -1965,8 +1936,8 @@ union KMP_ALIGN_CACHE kmp_depnode {
struct kmp_dephash_entry {
kmp_intptr_t addr;
- kmp_depnode_t * last_out;
- kmp_depnode_list_t * last_ins;
+ kmp_depnode_t * last_out;
+ kmp_depnode_list_t * last_ins;
kmp_dephash_entry_t * next_in_bucket;
};
@@ -2039,7 +2010,7 @@ struct kmp_taskdata { /* aligned during dynamic
ident_t * td_taskwait_ident;
kmp_uint32 td_taskwait_counter;
kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
- kmp_internal_control_t td_icvs; /* Internal control variables for the task */
+ KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */
volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */
volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */
#if OMP_40_ENABLED
@@ -2060,7 +2031,7 @@ KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 );
// Data for task team but per thread
typedef struct kmp_base_thread_data {
kmp_info_p * td_thr; // Pointer back to thread info
- // Used only in __kmp_execute_tasks, maybe not avail until task is queued?
+ // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued?
kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
kmp_uint32 td_deque_head; // Head of deque (will wrap)
@@ -2099,6 +2070,10 @@ typedef struct kmp_base_task_team {
volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */
KMP_ALIGN_CACHE
+#if KMP_USE_INTERNODE_ALIGNMENT
+ kmp_int32 tt_padme[INTERNODE_CACHE_LINE/sizeof(kmp_int32)];
+#endif
+
volatile kmp_uint32 tt_ref_ct; /* #threads accessing struct */
/* (not incl. master) */
kmp_int32 tt_state; /* alternating 0/1 for task team identification */
@@ -2111,8 +2086,6 @@ union KMP_ALIGN_CACHE kmp_task_team {
char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ];
};
-#endif // OMP_30_ENABLED
-
#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
// Free lists keep same-size free memory slots for fast memory allocation routines
typedef struct kmp_free_list {
@@ -2121,6 +2094,20 @@ typedef struct kmp_free_list {
void *th_free_list_other; // Non-self free list (to be returned to owner's sync list)
} kmp_free_list_t;
#endif
+#if KMP_NESTED_HOT_TEAMS
+// Hot teams array keeps hot teams and their sizes for given thread.
+// Hot teams are not put in teams pool, and they don't put threads in threads pool.
+typedef struct kmp_hot_team_ptr {
+ kmp_team_p *hot_team; // pointer to hot_team of given nesting level
+ kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
+} kmp_hot_team_ptr_t;
+#endif
+#if OMP_40_ENABLED
+typedef struct kmp_teams_size {
+ kmp_int32 nteams; // number of teams in a league
+ kmp_int32 nth; // number of threads in each team of the league
+} kmp_teams_size_t;
+#endif
/* ------------------------------------------------------------------------ */
// OpenMP thread data structures
@@ -2146,7 +2133,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
kmp_info_p *th_team_master; /* the team's master thread */
int th_team_serialized; /* team is serialized */
#if OMP_40_ENABLED
- microtask_t th_team_microtask; /* save entry address for teams construct */
+ microtask_t th_teams_microtask; /* save entry address for teams construct */
int th_teams_level; /* save initial level of teams construct */
/* it is 0 on device but may be any on host */
#endif
@@ -2158,21 +2145,21 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
int th_team_bt_intervals;
int th_team_bt_set;
- kmp_internal_control_t th_fixed_icvs; /* Initial ICVs for the thread */
-
#if KMP_AFFINITY_SUPPORTED
kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
#endif
-
/*
* The data set by the master at reinit, then R/W by the worker
*/
KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */
+#if KMP_NESTED_HOT_TEAMS
+ kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
+#endif
#if OMP_40_ENABLED
- int th_set_nth_teams; /* number of threads in parallel nested in teams construct */
kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
+ kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */
# if KMP_AFFINITY_SUPPORTED
int th_current_place; /* place currently bound to */
int th_new_place; /* place to bind to in par reg */
@@ -2182,6 +2169,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
#endif
#if USE_ITT_BUILD
kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
+ kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
kmp_uint64 th_frame_time; /* frame timestamp */
kmp_uint64 th_frame_time_serialized; /* frame timestamp in serialized parallel */
#endif /* USE_ITT_BUILD */
@@ -2200,24 +2188,18 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
/* while awaiting queuing lock acquire */
- volatile kmp_uint32 *th_sleep_loc;
+ volatile void *th_sleep_loc; // this points at a kmp_flag<T>
-/*
- * Two variables used for consistency check - struct cons_header *th_cons and inte th_first moved below
- * from here in order to avoid performance regression
-*/
ident_t *th_ident;
unsigned th_x; // Random number generator data
unsigned th_a; // Random number generator data
-#if OMP_30_ENABLED
/*
* Tasking-related data for the thread
*/
kmp_task_team_t * th_task_team; // Task team struct
kmp_taskdata_t * th_current_task; // Innermost Task being executed
kmp_uint8 th_task_state; // alternating 0/1 for task team identification
-#endif // OMP_30_ENABLED
/*
* More stuff for keeping track of active/sleeping threads
@@ -2229,8 +2211,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
// 32 bits for TCR/TCW
- struct cons_header * th_cons;
- int th_first;
+ struct cons_header * th_cons; // used for consistency check
/*
* Add the syncronizing data which is cache aligned and padded.
@@ -2259,6 +2240,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
kmp_itt_mark_t th_itt_mark_single;
// alignment ???
#endif /* USE_ITT_BUILD */
+#if KMP_STATS_ENABLED
+ kmp_stats_list* th_stats;
+#endif
} kmp_base_info_t;
typedef union KMP_ALIGN_CACHE kmp_info {
@@ -2291,154 +2275,89 @@ typedef int (*launch_t)( int gtid );
/* Minimum number of ARGV entries to malloc if necessary */
#define KMP_MIN_MALLOC_ARGV_ENTRIES 100
-#if KMP_MIC && OMP_30_ENABLED
-# define KMP_BARRIER_ICV_PULL 1
+// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we
+// have supported at least 96 bytes. Using a larger value for more space between the master write/worker
+// read section and read/write by all section seems to buy more performance on EPCC PARALLEL.
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
#else
-# define KMP_BARRIER_ICV_PULL 0
-#endif
-
-#if (KMP_PERF_V106 == KMP_ON)
-//
-// Set up how many argv pointers will fit in cache lines containing
-// *t_inline_argv. Historically, we have supported at least 96 bytes.
-//
-// Using a larger value for more space between the master write/worker read
-// section and read/write by all section seems to buy more performance
-// on EPCC PARALLEL.
-//
-//# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE )
-# if KMP_BARRIER_ICV_PULL
-# define KMP_INLINE_ARGV_BYTES 192
-//# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 5 * KMP_PTR_SKIP + 10 * sizeof(int) + sizeof(kmp_int64) ) % CACHE_LINE ) )
-# elif KMP_ARCH_X86 || KMP_ARCH_X86_64
-# define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
-# else
-# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
-# endif
-# define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
+# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
#endif
+#define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
typedef struct KMP_ALIGN_CACHE kmp_base_team {
-/*
- * Synchronization Data
- */
- KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
+ // Synchronization Data ---------------------------------------------------------------------------------
+ KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
kmp_balign_team_t t_bar[ bs_last_barrier ];
-
- /* count of single directive encountered by team */
- volatile int t_construct;
- kmp_lock_t t_single_lock; /* team specific lock */
-
-/*
- * Master only
- */
- KMP_ALIGN_CACHE int t_master_tid; /* tid of master in parent team */
- int t_master_this_cons; /* "this_construct" single counter of master in parent team */
- int t_master_last_cons; /* "last_construct" single counter of master in parent team */
- ident_t *t_ident; /* if volatile, have to change too much other crud to volatile too */
- kmp_team_p *t_parent; /* parent team */
- kmp_team_p *t_next_pool; /* next free team in the team pool */
- kmp_disp_t *t_dispatch; /* thread's dispatch data */
-#if OMP_30_ENABLED
- kmp_task_team_t *t_task_team; /* Task team struct */
-#endif /* OMP_30_ENABLED */
+ volatile int t_construct; // count of single directive encountered by team
+ kmp_lock_t t_single_lock; // team specific lock
+
+ // Master only -----------------------------------------------------------------------------------------
+ KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
+ int t_master_this_cons; // "this_construct" single counter of master in parent team
+ ident_t *t_ident; // if volatile, have to change too much other crud to volatile too
+ kmp_team_p *t_parent; // parent team
+ kmp_team_p *t_next_pool; // next free team in the team pool
+ kmp_disp_t *t_dispatch; // thread's dispatch data
+ kmp_task_team_t *t_task_team; // Task team struct
#if OMP_40_ENABLED
- kmp_proc_bind_t t_proc_bind; /* bind type for par region */
+ kmp_proc_bind_t t_proc_bind; // bind type for par region
#endif // OMP_40_ENABLED
+#if USE_ITT_BUILD
+ kmp_uint64 t_region_time; // region begin timestamp
+#endif /* USE_ITT_BUILD */
-/*
- * Master write, workers read
- */
- KMP_ALIGN_CACHE
- void **t_argv;
+ // Master write, workers read --------------------------------------------------------------------------
+ KMP_ALIGN_CACHE void **t_argv;
int t_argc;
-#if (KMP_PERF_V106 == KMP_ON)
- /* swap cache lines for t_nproc and t_max_argc */
- int t_nproc; /* number of threads in team */
-#else
- int t_max_argc;
-#endif
+ int t_nproc; // number of threads in team
microtask_t t_pkfn;
- launch_t t_invoke; /* procedure to launch the microtask */
-
+ launch_t t_invoke; // procedure to launch the microtask
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
kmp_int8 t_fp_control_saved;
kmp_int8 t_pad2b;
- kmp_int16 t_x87_fpu_control_word; /* FP control regs */
+ kmp_int16 t_x87_fpu_control_word; // FP control regs
kmp_uint32 t_mxcsr;
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-#if (KMP_PERF_V106 == KMP_ON)
void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
-#endif
-#if (KMP_PERF_V19 == KMP_ON)
- KMP_ALIGN_CACHE
-#endif
- kmp_info_t **t_threads;
-#if (KMP_PERF_V106 == KMP_ON)
- /* swap cache lines for t_nproc and t_max_argc */
+ KMP_ALIGN_CACHE kmp_info_t **t_threads;
int t_max_argc;
-#else
- int t_nproc; /* number of threads in team */
-#endif
- int t_max_nproc; /* maximum threads this team can handle (this is dynamicly expandable) */
- int t_serialized; /* levels deep of serialized teams */
- dispatch_shared_info_t *t_disp_buffer; /* buffers for dispatch system */
+ int t_max_nproc; // maximum threads this team can handle (dynamicly expandable)
+ int t_serialized; // levels deep of serialized teams
+ dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
int t_id; // team's id, assigned by debugger.
-#if OMP_30_ENABLED
- int t_level; /* nested parallel level */
- int t_active_level; /* nested active parallel level */
- kmp_r_sched_t t_sched; /* run-time schedule for the team */
-#endif // OMP_30_ENABLED
+ int t_level; // nested parallel level
+ int t_active_level; // nested active parallel level
+ kmp_r_sched_t t_sched; // run-time schedule for the team
#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
- int t_first_place; /* first & last place in */
- int t_last_place; /* parent thread's partition. */
- /* Restore these values to */
- /* master after par region. */
+ int t_first_place; // first & last place in parent thread's partition.
+ int t_last_place; // Restore these values to master after par region.
#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
#if KMP_MIC
- int t_size_changed; /* team size was changed?: 0 - no, 1 - yes, -1 - changed via omp_set_num_threads() call */
+ int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
#endif
-/*
- * Read/write by workers as well
- */
+ // Read/write by workers as well -----------------------------------------------------------------------
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // Using CACHE_LINE=64 reduces memory footprint,
- // but causes a big perf regression of epcc 'parallel' and 'barrier' on fxe256lin01.
- // This extra padding serves to fix the performance of epcc 'parallel' and 'barrier' when CACHE_LINE=64.
- // TODO: investigate more and get rid if this padding.
+ // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
+ // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
+ // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
char dummy_padding[1024];
#endif
- KMP_ALIGN_CACHE
-#if OMP_30_ENABLED
- kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
-#else
- // Internal control variables for current thread team
- // TODO Convert these fields to an array of kmp_internal_control_t which simplifies parameter passing
- // and also prevents performance degradation due to false sharing when all threads set a control var
- int *t_set_nproc; /* internal control for # of threads for next
- parallel region (per thread) */
- int *t_set_nested; /* internal control for nested parallelism (per thread) */
- int *t_set_dynamic; /* internal control for dynamic adjustment of threads (per thread) */
- int *t_set_blocktime; /* internal control for blocktime */
- int *t_set_bt_intervals; /* internal control for blocktime intervals */
- int *t_set_bt_set; /* internal control for whether blocktime is explicitly set */
-#endif // OMP_30_ENABLED
-
- kmp_internal_control_t *t_control_stack_top; /* internal control stack for additional nested teams.
- for SERIALIZED teams nested 2 or more levels deep */
+ KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
+ kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams.
+ // for SERIALIZED teams nested 2 or more levels deep
#if OMP_40_ENABLED
- kmp_int32 t_cancel_request; /* typed flag to store request state of cancellation */
+ kmp_int32 t_cancel_request; // typed flag to store request state of cancellation
#endif
-
- int t_master_active;/* save on fork, restore on join */
- kmp_taskq_t t_taskq; /* this team's task queue */
- void *t_copypriv_data; /* team specific pointer to copyprivate data array */
+ int t_master_active; // save on fork, restore on join
+ kmp_taskq_t t_taskq; // this team's task queue
+ void *t_copypriv_data; // team specific pointer to copyprivate data array
kmp_uint32 t_copyin_counter;
#if USE_ITT_BUILD
- void *t_stack_id; /* team specific stack stitching id (for ittnotify) */
+ void *t_stack_id; // team specific stack stitching id (for ittnotify)
#endif /* USE_ITT_BUILD */
} kmp_base_team_t;
@@ -2543,6 +2462,7 @@ extern int __kmp_debug_count; /* Counter for number of lines printe
extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */
/* end rotating debug buffer */
+#ifdef KMP_DEBUG
extern int __kmp_par_range; /* +1 => only go par for constructs in range */
#define KMP_PAR_RANGE_ROUTINE_LEN 1024
@@ -2551,6 +2471,7 @@ extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
extern int __kmp_par_range_lb;
extern int __kmp_par_range_ub;
+#endif
/* For printing out dynamic storage map for threads and teams */
extern int __kmp_storage_map; /* True means print storage map for threads and teams */
@@ -2607,14 +2528,13 @@ extern enum library_type __kmp_library;
extern enum sched_type __kmp_sched; /* default runtime scheduling */
extern enum sched_type __kmp_static; /* default static scheduling method */
extern enum sched_type __kmp_guided; /* default guided scheduling method */
-#if OMP_30_ENABLED
extern enum sched_type __kmp_auto; /* default auto scheduling method */
-#endif // OMP_30_ENABLED
extern int __kmp_chunk; /* default runtime chunk size */
extern size_t __kmp_stksize; /* stack size per thread */
extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */
extern size_t __kmp_stkoffset; /* stack offset per thread */
+extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */
@@ -2629,7 +2549,7 @@ extern int __kmp_generate_warnings; /* should we issue warnings? */
extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
#ifdef DEBUG_SUSPEND
-extern int __kmp_suspend_count; /* count inside __kmp_suspend() */
+extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
#endif
extern kmp_uint32 __kmp_yield_init;
@@ -2693,9 +2613,11 @@ extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control r
extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-#if OMP_30_ENABLED
extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */
-#endif // OMP_30_ENABLED
+#if KMP_NESTED_HOT_TEAMS
+extern int __kmp_hot_teams_mode;
+extern int __kmp_hot_teams_max_level;
+#endif
# if KMP_OS_LINUX
extern enum clock_function_type __kmp_clock_function;
@@ -2833,8 +2755,6 @@ static inline kmp_info_t * __kmp_entry_thread()
return __kmp_threads[gtid];
}
-#if OMP_30_ENABLED
-
extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels );
extern int __kmp_get_max_active_levels( int gtid );
extern int __kmp_get_ancestor_thread_num( int gtid, int level );
@@ -2842,8 +2762,6 @@ extern int __kmp_get_team_size( int gtid, int level );
extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk );
extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk );
-#endif // OMP_30_ENABLED
-
extern unsigned short __kmp_get_random( kmp_info_t * thread );
extern void __kmp_init_random( kmp_info_t * thread );
@@ -2888,8 +2806,6 @@ extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num
#endif
extern void __kmp_yield( int cond );
-extern void __kmp_release( kmp_info_t *target_thr, volatile kmp_uint *spin,
- enum kmp_mem_fence_type fetchadd_fence );
extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
@@ -2956,11 +2872,28 @@ extern kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker );
extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj );
extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj );
-extern void __kmp_wait_sleep( kmp_info_t *this_thr, volatile kmp_uint *spinner, kmp_uint checker, kmp_int final_spin
+class kmp_flag_32;
+class kmp_flag_64;
+class kmp_flag_oncore;
+extern void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin
#if USE_ITT_BUILD
- , void * itt_sync_obj
-#endif /* USE_ITT_BUILD */
-);
+ , void * itt_sync_obj
+#endif
+ );
+extern void __kmp_release_32(kmp_flag_32 *flag);
+extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
+#if USE_ITT_BUILD
+ , void * itt_sync_obj
+#endif
+ );
+extern void __kmp_release_64(kmp_flag_64 *flag);
+extern void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin
+#if USE_ITT_BUILD
+ , void * itt_sync_obj
+#endif
+ );
+extern void __kmp_release_oncore(kmp_flag_oncore *flag);
+
extern void __kmp_infinite_loop( void );
extern void __kmp_cleanup( void );
@@ -3003,9 +2936,10 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
extern void __kmp_balanced_affinity( int tid, int team_size );
-
#endif /* KMP_AFFINITY_SUPPORTED */
+extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
+
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
extern int __kmp_futex_determine_capable( void );
@@ -3035,8 +2969,12 @@ extern void __kmp_reap_monitor( kmp_info_t *th );
extern void __kmp_reap_worker( kmp_info_t *th );
extern void __kmp_terminate_thread( int gtid );
-extern void __kmp_suspend( int th_gtid, volatile kmp_uint *spinner, kmp_uint checker );
-extern void __kmp_resume( int target_gtid, volatile kmp_uint *spinner );
+extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag );
+extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag );
+extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag );
+extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag );
+extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag );
+extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag );
extern void __kmp_elapsed( double * );
extern void __kmp_elapsed_tick( double * );
@@ -3062,19 +3000,14 @@ extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
kmp_proc_bind_t proc_bind,
kmp_internal_control_t *new_icvs,
- int argc );
-#elif OMP_30_ENABLED
-extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
- kmp_internal_control_t *new_icvs,
- int argc );
+ int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
#else
extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
- int new_set_nproc, int new_set_dynamic, int new_set_nested,
- int new_set_blocktime, int new_bt_intervals, int new_bt_set,
- int argc );
-#endif // OMP_30_ENABLED
+ kmp_internal_control_t *new_icvs,
+ int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
+#endif // OMP_40_ENABLED
extern void __kmp_free_thread( kmp_info_t * );
-extern void __kmp_free_team( kmp_root_t *, kmp_team_t * );
+extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) );
extern kmp_team_t * __kmp_reap_team( kmp_team_t * );
/* ------------------------------------------------------------------------ */
@@ -3094,7 +3027,16 @@ extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) );
extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid );
-extern int __kmp_fork_call( ident_t *loc, int gtid, int exec_master,
+/*!
+ * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call.
+ */
+enum fork_context_e
+{
+ fork_context_gnu, /**< Called from GNU generated code, so must not invoke the microtask internally. */
+ fork_context_intel, /**< Called from Intel generated code. */
+ fork_context_last
+};
+extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
kmp_int32 argc, microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_ARM || KMP_ARCH_X86_64) && KMP_OS_LINUX
@@ -3110,6 +3052,7 @@ extern void __kmp_join_call( ident_t *loc, int gtid
#endif
);
+extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team );
extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team );
extern int __kmp_invoke_task_func( int gtid );
@@ -3120,7 +3063,7 @@ extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_th
KMP_EXPORT int __kmpc_invoke_task_func( int gtid );
#if OMP_40_ENABLED
extern int __kmp_invoke_teams_master( int gtid );
-extern void __kmp_teams_master( microtask_t microtask, int gtid );
+extern void __kmp_teams_master( int gtid );
#endif
extern void __kmp_save_internal_controls( kmp_info_t * thread );
extern void __kmp_user_set_library (enum library_type arg);
@@ -3135,7 +3078,6 @@ void ompc_set_nested( int flag );
void ompc_set_dynamic( int flag );
void ompc_set_num_threads( int arg );
-#if OMP_30_ENABLED
extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr,
kmp_team_t *team, int tid );
extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr );
@@ -3145,12 +3087,25 @@ extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid,
extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr,
kmp_team_t *team, int tid, int set_curr_task );
-extern int __kmp_execute_tasks( kmp_info_t *thread, kmp_int32 gtid, volatile kmp_uint *spinner,
- kmp_uint checker, int final_spin, int *thread_finished,
+int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
+ int *thread_finished,
#if USE_ITT_BUILD
- void * itt_sync_obj,
+ void * itt_sync_obj,
#endif /* USE_ITT_BUILD */
- int c );
+ kmp_int32 is_constrained);
+int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
+ int *thread_finished,
+#if USE_ITT_BUILD
+ void * itt_sync_obj,
+#endif /* USE_ITT_BUILD */
+ kmp_int32 is_constrained);
+int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
+ int *thread_finished,
+#if USE_ITT_BUILD
+ void * itt_sync_obj,
+#endif /* USE_ITT_BUILD */
+ kmp_int32 is_constrained);
+
extern void __kmp_reap_task_teams( void );
extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
extern void __kmp_wait_to_unref_task_teams( void );
@@ -3163,8 +3118,6 @@ extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team
);
extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );
-#endif // OMP_30_ENABLED
-
extern int __kmp_is_address_mapped( void *addr );
extern kmp_uint64 __kmp_hardware_timestamp(void);
@@ -3259,7 +3212,6 @@ KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid
/* ------------------------------------------------------------------------ */
-#if OMP_30_ENABLED
/*
* OMP 3.0 tasking interface routines
*/
@@ -3288,9 +3240,9 @@ void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *tas
#endif // TASK_UNUSED
/* ------------------------------------------------------------------------ */
-#endif // OMP_30_ENABLED
#if OMP_40_ENABLED
+
KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
@@ -3301,13 +3253,13 @@ KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int
kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task );
-#endif
+extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate );
-#if OMP_40_ENABLED
KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
+
#endif
/*
@@ -3404,8 +3356,6 @@ kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_si
# define KMPC_CONVENTION
#endif
-#if OMP_30_ENABLED
-
#ifndef __OMP_H
typedef enum omp_sched_t {
omp_sched_static = 1,
@@ -3424,8 +3374,6 @@ KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_ma
KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
-#endif // OMP_30_ENABLED
-
KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
OpenPOWER on IntegriCloud