diff options
author | Jim Cownie <james.h.cownie@intel.com> | 2013-12-23 17:28:57 +0000 |
---|---|---|
committer | Jim Cownie <james.h.cownie@intel.com> | 2013-12-23 17:28:57 +0000 |
commit | 181b4bb3bb21a576458686d8e40234f658a3d35e (patch) | |
tree | a7acb48bf7338894f3c0f2c81e97564a98ccce3c /openmp/runtime/src | |
parent | 701875542d670e40e61390ccac08a6ed1de4d91f (diff) | |
download | bcm5719-llvm-181b4bb3bb21a576458686d8e40234f658a3d35e.tar.gz bcm5719-llvm-181b4bb3bb21a576458686d8e40234f658a3d35e.zip |
For your Christmas hacking pleasure.
This release use aligns with Intel(r) Composer XE 2013 SP1 Product Update 2
New features
* The library can now be built with clang (though wiht some
limitations since clang does not support 128 bit floats)
* Support for Vtune analysis of load imbalance
* Code contribution from Steven Noonan to build the runtime for ARM*
architecture processors
* First implementation of runtime API for OpenMP cancellation
Bug Fixes
* Fixed hang on Windows (only) when using KMP_BLOCKTIME=0
llvm-svn: 197914
Diffstat (limited to 'openmp/runtime/src')
43 files changed, 2874 insertions, 1300 deletions
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports index cfcbdeb92f5..779f1d43cd2 100644 --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -357,6 +357,9 @@ kmpc_set_defaults 224 __kmpc_fork_teams 241 __kmpc_omp_task_with_deps 242 __kmpc_omp_wait_deps 243 + __kmpc_cancel 244 + __kmpc_cancellationpoint 245 + __kmpc_cancel_barrier 246 %endif # OMP_40 %endif @@ -455,6 +458,8 @@ kmp_set_warnings_off 780 #omp_curr_proc_bind 864 omp_get_num_teams 865 omp_get_team_num 866 + omp_get_cancellation 867 + kmp_get_cancellation_status 868 %endif # OMP_40 %ifndef stub diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt index 4ddf575d1ac..9ace78fd549 100644 --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -80,4 +80,26 @@ VERSION { }; # VERSION +# sets up GCC OMP_ version dependency chain +OMP_1.0 { +}; +OMP_2.0 { +} OMP_1.0; +OMP_3.0 { +} OMP_2.0; +OMP_3.1 { +} OMP_3.0; +OMP_4.0 { +} OMP_3.1; + +# sets up GCC GOMP_ version dependency chain +GOMP_1.0 { +}; +GOMP_2.0 { +} GOMP_1.0; +GOMP_3.0 { +} GOMP_2.0; +GOMP_4.0 { +} GOMP_3.0; + # end of file # diff --git a/openmp/runtime/src/include/40/iomp.h.var b/openmp/runtime/src/include/40/iomp.h.var index 88b74f35b79..8aeb38c3bc2 100644 --- a/openmp/runtime/src/include/40/iomp.h.var +++ b/openmp/runtime/src/include/40/iomp.h.var @@ -82,6 +82,16 @@ extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + /* schedule kind constants */ + typedef enum kmp_cancel_kind_t { + kmp_cancel_parallel = 1, + kmp_cancel_loop = 2, + kmp_cancel_sections = 3, + kmp_cancel_taskgroup = 4 + } kmp_cancel_kind_t; + + extern int __KAI_KMPC_CONVENTION kmp_get_cancellation_status(kmp_cancel_kind_t); + # undef __KAI_KMPC_CONVENTION /* Warning: diff --git a/openmp/runtime/src/include/40/omp.h.var b/openmp/runtime/src/include/40/omp.h.var index 38400d418b1..c6dd4cd4ee1 100644 --- a/openmp/runtime/src/include/40/omp.h.var +++ b/openmp/runtime/src/include/40/omp.h.var @@ -27,30 +27,6 @@ extern "C" { # endif -# define omp_set_num_threads ompc_set_num_threads -# define omp_set_dynamic ompc_set_dynamic -# define omp_set_nested ompc_set_nested -# define omp_set_max_active_levels ompc_set_max_active_levels -# define omp_set_schedule ompc_set_schedule -# define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num -# define omp_get_team_size ompc_get_team_size - - -# define kmp_set_stacksize kmpc_set_stacksize -# define kmp_set_stacksize_s kmpc_set_stacksize_s -# define kmp_set_blocktime kmpc_set_blocktime -# define kmp_set_library kmpc_set_library -# define kmp_set_defaults kmpc_set_defaults -# define kmp_set_affinity_mask_proc kmpc_set_affinity_mask_proc -# define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc -# define kmp_get_affinity_mask_proc kmpc_get_affinity_mask_proc - -# define kmp_malloc kmpc_malloc -# define kmp_calloc kmpc_calloc -# define kmp_realloc kmpc_realloc -# define kmp_free kmpc_free - - # if defined(_WIN32) # define __KAI_KMPC_CONVENTION __cdecl # else @@ -120,6 +96,7 @@ extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void); extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void); extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); + extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); # include <stdlib.h> /* kmp API functions */ diff --git a/openmp/runtime/src/include/40/omp_lib.f.var b/openmp/runtime/src/include/40/omp_lib.f.var index 0adadb10260..fb9b2f28da4 100644 --- a/openmp/runtime/src/include/40/omp_lib.f.var +++ b/openmp/runtime/src/include/40/omp_lib.f.var @@ -32,6 +32,7 @@ integer, parameter :: kmp_pointer_kind = int_ptr_kind() integer, parameter :: kmp_size_t_kind = int_ptr_kind() integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + integer, parameter :: kmp_cancel_kind = omp_integer_kind end module omp_lib_kinds @@ -56,6 +57,11 @@ integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + interface ! *** @@ -199,6 +205,11 @@ integer (kind=omp_integer_kind) omp_get_team_num end function omp_get_team_num + function omp_get_cancellation() + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_cancellation + end function omp_get_cancellation + subroutine omp_init_lock(lockvar) !DIR$ IF(__INTEL_COMPILER.GE.1400) !DIR$ attributes known_intrinsic :: omp_init_lock @@ -417,6 +428,11 @@ subroutine kmp_set_warnings_off() end subroutine kmp_set_warnings_off + function kmp_get_cancellation_status(cancelkind) + use omp_lib_kinds + integer (kind=kmp_cancel_kind) cancelkind + logical (kind=omp_logical_kind) kmp_get_cancellation_status + end function kmp_get_cancellation_status end interface !dec$ if defined(_WIN32) @@ -459,6 +475,7 @@ !dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices !dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams !dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num +!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation !dec$ attributes alias:'omp_init_lock' :: omp_init_lock !dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock @@ -498,6 +515,8 @@ !dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on !dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off +!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status + !dec$ else !*** @@ -531,6 +550,7 @@ !dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices !dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams !dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num +!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock !dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock @@ -570,6 +590,8 @@ !dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on !dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off +!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status + !dec$ endif !dec$ endif @@ -606,6 +628,7 @@ !dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices !dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams !dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num +!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation !dec$ attributes alias:'omp_init_lock_'::omp_init_lock !dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock @@ -644,6 +667,7 @@ !dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on !dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off +!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status !dec$ endif @@ -678,6 +702,7 @@ !dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick !dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams !dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num +!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation !dec$ attributes alias:'_omp_init_lock_'::omp_init_lock !dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock @@ -717,6 +742,8 @@ !dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on !dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off +!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status + !dec$ endif end module omp_lib diff --git a/openmp/runtime/src/include/40/omp_lib.f90.var b/openmp/runtime/src/include/40/omp_lib.f90.var index 5cac259b49e..f78535212ca 100644 --- a/openmp/runtime/src/include/40/omp_lib.f90.var +++ b/openmp/runtime/src/include/40/omp_lib.f90.var @@ -28,6 +28,7 @@ integer, parameter :: kmp_pointer_kind = c_intptr_t integer, parameter :: kmp_size_t_kind = c_size_t integer, parameter :: kmp_affinity_mask_kind = c_intptr_t + integer, parameter :: kmp_cancel_kind = omp_integer_kind end module omp_lib_kinds @@ -47,12 +48,18 @@ integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3 integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4 + integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 + integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + interface ! *** @@ -198,6 +205,11 @@ integer (kind=omp_integer_kind) omp_get_team_num end function omp_get_team_num + function omp_get_cancellation() bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_get_cancellation + end function omp_get_cancellation + subroutine omp_init_lock(lockvar) bind(c) !DIR$ IF(__INTEL_COMPILER.GE.1400) !DIR$ attributes known_intrinsic :: omp_init_lock @@ -417,6 +429,12 @@ subroutine kmp_set_warnings_off() bind(c) end subroutine kmp_set_warnings_off + function kmp_get_cancellation_status(cancelkind) bind(c) + use omp_lib_kinds + integer (kind=kmp_cancel_kind), value :: cancelkind + logical (kind=omp_logical_kind) kmp_get_cancellation_status + end function kmp_get_cancellation_status + end interface end module omp_lib diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 7117571e76d..37c7f41bdc4 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1,8 +1,8 @@ /*! \file */ /* * kmp.h -- KPTS runtime header file. - * $Revision: 42642 $ - * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $ + * $Revision: 42816 $ + * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $ */ @@ -26,10 +26,6 @@ */ //#define FIX_SGI_CLOCK -#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER ) -typedef __float128 _Quad; -#endif - /* Defines for OpenMP 3.0 tasking and auto scheduling */ #if OMP_30_ENABLED @@ -81,9 +77,12 @@ typedef __float128 _Quad; #include <errno.h> +#include "kmp_os.h" + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 #include <xmmintrin.h> +#endif -#include "kmp_os.h" #include "kmp_version.h" #include "kmp_debug.h" #include "kmp_lock.h" @@ -188,7 +187,7 @@ typedef struct ident { /* contextual information. */ #endif /* USE_ITT_BUILD */ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ - char *psource; /**< String describing the source location. + char const *psource; /**< String describing the source location. The string is composed of semi-colon separated fields which describe the source file, the function and a pair of line numbers that delimit the construct. */ @@ -231,6 +230,13 @@ extern "C" { /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ +#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) +#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) + +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + + /* Enumeration types */ enum kmp_state_timer { @@ -752,6 +758,16 @@ extern int __kmp_affinity_num_places; #endif /* OMP_40_ENABLED */ +#if OMP_40_ENABLED +typedef enum kmp_cancel_kind_t { + cancel_noreq = 0, + cancel_parallel = 1, + cancel_loop = 2, + cancel_sections = 3, + cancel_taskgroup = 4 +} kmp_cancel_kind_t; +#endif // OMP_40_ENABLED + #if KMP_MIC extern unsigned int __kmp_place_num_cores; extern unsigned int __kmp_place_num_threads_per_core; @@ -777,7 +793,7 @@ extern unsigned int __kmp_place_core_offset; #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() #define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ - /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : /* TODO remove this check, it is redundant */ \ + /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : */ /* TODO remove this check, it is redundant */ \ __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid ) #define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) ) @@ -1078,14 +1094,6 @@ extern kmp_key_t __kmp_tv_key; #endif /* BUILD_TV */ /* ------------------------------------------------------------------------ */ -// Some forward declarations. - -typedef union kmp_team kmp_team_t; -typedef struct kmp_taskdata kmp_taskdata_t; -typedef union kmp_task_team kmp_task_team_t; -typedef union kmp_team kmp_team_p; -typedef union kmp_info kmp_info_p; -typedef union kmp_root kmp_root_p; #if USE_ITT_BUILD // We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here. @@ -1883,8 +1891,12 @@ typedef struct kmp_task { /* GEH: Shouldn't this be aligned so void * shareds; /**< pointer to block of pointers to shared vars */ kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ kmp_int32 part_id; /**< part id for the task */ +#if OMP_40_ENABLED + kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ +#endif // OMP_40_ENABLED /* private vars */ } kmp_task_t; + /*! @} */ @@ -1892,6 +1904,7 @@ typedef struct kmp_task { /* GEH: Shouldn't this be aligned so #if OMP_40_ENABLED typedef struct kmp_taskgroup { kmp_uint32 count; // number of allocated and not yet complete tasks + kmp_int32 cancel_request; // request for cancellation of this taskgroup struct kmp_taskgroup *parent; // parent taskgroup } kmp_taskgroup_t; @@ -1974,7 +1987,12 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 b unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ unsigned final : 1; /* task is final(1) so execute immediately */ unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ - unsigned reserved13 : 13; /* reserved for compiler use */ +#if OMP_40_ENABLED + unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */ + unsigned reserved : 12; /* reserved for compiler use */ +#else // OMP_40_ENABLED + unsigned reserved : 13; /* reserved for compiler use */ +#endif // OMP_40_ENABLED /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ @@ -2014,7 +2032,11 @@ struct kmp_taskdata { /* aligned during dynamic kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies #endif +#if KMP_HAVE_QUAD _Quad td_dummy; // Align structure 16-byte size since allocated just before kmp_task_t +#else + kmp_uint32 td_dummy[2]; +#endif }; // struct kmp_taskdata // Make sure padding above worked @@ -2121,6 +2143,8 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { int th_team_bt_intervals; int th_team_bt_set; + kmp_internal_control_t th_fixed_icvs; /* Initial ICVs for the thread */ + #if KMP_OS_WINDOWS || KMP_OS_LINUX kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ @@ -2142,6 +2166,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { # endif #endif #if USE_ITT_BUILD + kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ kmp_uint64 th_frame_time; /* frame timestamp */ kmp_uint64 th_frame_time_serialized; /* frame timestamp in serialized parallel */ #endif /* USE_ITT_BUILD */ @@ -2328,15 +2353,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { kmp_uint32 t_mxcsr; #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -#if KMP_BARRIER_ICV_PULL - // - // Note: Putting ICV's before the fp control info causes a very slight - // ~1% improvement for EPCC parallel on fxe256lin01 / 256 threads, but - // causes a 17% regression on fxe64lin01 / 64 threads. - // - kmp_internal_control_t t_initial_icvs; -#endif // KMP_BARRIER_ICV_PULL - #if (KMP_PERF_V106 == KMP_ON) void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; #endif @@ -2398,6 +2414,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { kmp_internal_control_t *t_control_stack_top; /* internal control stack for additional nested teams. for SERIALIZED teams nested 2 or more levels deep */ +#if OMP_40_ENABLED + kmp_int32 t_cancel_request; /* typed flag to store request state of cancellation */ +#endif int t_master_active;/* save on fork, restore on join */ kmp_taskq_t t_taskq; /* this team's task queue */ @@ -2479,8 +2498,6 @@ extern int __kmp_duplicate_library_ok; #if USE_ITT_BUILD extern int __kmp_forkjoin_frames; extern int __kmp_forkjoin_frames_mode; -extern FILE * __kmp_itt_csv_file; -extern kmp_str_buf_t __kmp_itt_frame_buffer; #endif extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; extern int __kmp_determ_red; @@ -2526,9 +2543,6 @@ extern int __kmp_storage_map_verbose; /* True means storage map includes pl extern int __kmp_storage_map_verbose_specified; extern kmp_cpuinfo_t __kmp_cpuinfo; -extern kmp_uint64 __kmp_cpu_frequency; - // CPU frequency, in Hz. Set by __kmp_runtime_initialize(). 0 means "is not set yet", - // ~ 0 signals an errror. extern volatile int __kmp_init_serial; extern volatile int __kmp_init_gtid; @@ -2678,13 +2692,13 @@ extern double __kmp_load_balance_interval; /* Interval for the load balan # endif /* USE_LOAD_BALANCE */ // OpenMP 3.1 - Nested num threads array -struct kmp_nested_nthreads_t { +typedef struct kmp_nested_nthreads_t { int * nth; int size; int used; -}; +} kmp_nested_nthreads_t; -extern struct kmp_nested_nthreads_t __kmp_nested_nth; +extern kmp_nested_nthreads_t __kmp_nested_nth; #if KMP_USE_ADAPTIVE_LOCKS @@ -2707,6 +2721,7 @@ extern char * __kmp_speculative_statsfile; #if OMP_40_ENABLED extern int __kmp_display_env; /* TRUE or FALSE */ extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ +extern int __kmp_omp_cancellation; /* TRUE or FALSE */ #endif /* ------------------------------------------------------------------------- */ @@ -2796,7 +2811,7 @@ extern void __kmp_warn( char const * format, ... ); extern void __kmp_set_num_threads( int new_nth, int gtid ); // Returns current thread (pointer to kmp_info_t). Current thread *must* be registered. -inline kmp_info_t * __kmp_entry_thread() +static inline kmp_info_t * __kmp_entry_thread() { int gtid = __kmp_entry_gtid(); @@ -2976,11 +2991,11 @@ extern void __kmp_balanced_affinity( int tid, int team_size ); #endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */ -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) extern int __kmp_futex_determine_capable( void ); -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) extern void __kmp_gtid_set_specific( int gtid ); extern int __kmp_gtid_get_specific( void ); @@ -3067,7 +3082,7 @@ extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid ); extern int __kmp_fork_call( ident_t *loc, int gtid, int exec_master, kmp_int32 argc, microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_ARM || KMP_ARCH_X86_64) && KMP_OS_LINUX va_list *ap #else va_list ap @@ -3120,7 +3135,7 @@ extern int __kmp_execute_tasks( kmp_info_t *thread, kmp_int32 gtid, volatile km #if USE_ITT_BUILD void * itt_sync_obj, #endif /* USE_ITT_BUILD */ - int c = 0 ); + int c ); extern void __kmp_reap_task_teams( void ); extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread ); extern void __kmp_wait_to_unref_task_teams( void ); @@ -3138,6 +3153,9 @@ extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gti extern int __kmp_is_address_mapped( void *addr ); extern kmp_uint64 __kmp_hardware_timestamp(void); +#if KMP_OS_UNIX +extern int __kmp_read_from_file( char const *path, char const *format, ... ); +#endif /* ------------------------------------------------------------------------ */ // @@ -3148,7 +3166,7 @@ extern kmp_uint64 __kmp_hardware_timestamp(void); extern void __kmp_query_cpuid( kmp_cpuinfo_t *p ); -static inline void __kmp_load_mxcsr ( kmp_uint32 *p ) { _mm_setcsr( *p ); } +#define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); } extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p ); @@ -3258,8 +3276,8 @@ void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *tas #endif // OMP_30_ENABLED #if OMP_40_ENABLED -KMP_EXPORT void __kmpc_taskgroup( ident* loc, int gtid ); -KMP_EXPORT void __kmpc_end_taskgroup( ident* loc, int gtid ); +KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid ); +KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid ); KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, @@ -3270,6 +3288,13 @@ extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ); #endif +#if OMP_40_ENABLED +KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); +KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); +KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid); +KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); +#endif + /* * Lock interface routines (fast versions with gtid passed in) */ @@ -3355,6 +3380,42 @@ kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, struct private_common * kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); +// +// ompc_, kmpc_ entries moved from omp.h. +// +#if KMP_OS_WINDOWS +# define KMPC_CONVENTION __cdecl +#else +# define KMPC_CONVENTION +#endif + +#if OMP_30_ENABLED + +#ifndef __OMP_H +typedef enum omp_sched_t { + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 +} omp_sched_t; +typedef void * kmp_affinity_mask_t; +#endif + +KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); +KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); +KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); +KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); +KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); + +#endif // OMP_30_ENABLED + +KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); +KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); +KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); +KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); + #ifdef __cplusplus } #endif diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 0840fa3fd8d..644251da4b5 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1,7 +1,7 @@ /* * kmp_affinity.cpp -- affinity management - * $Revision: 42613 $ - * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -1885,7 +1885,19 @@ __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val; if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field; threadInfo[num_avail][osIdIndex] = val; +#if KMP_OS_LINUX && USE_SYSFS_INFO + char path[256]; + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", + threadInfo[num_avail][osIdIndex]); + __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); + + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%u/topology/core_id", + threadInfo[num_avail][osIdIndex]); + __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); continue; +#else } char s2[] = "physical id"; if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { @@ -1906,6 +1918,7 @@ __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field; threadInfo[num_avail][coreIdIndex] = val; continue; +#endif // KMP_OS_LINUX && USE_SYSFS_INFO } char s4[] = "thread id"; if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { @@ -3058,8 +3071,6 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, int setSize = 0; for (;;) { - int start, count, stride; - __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); // @@ -3090,7 +3101,7 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, "bad explicit places list"); next = scan; SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); + int count = __kmp_str_to_int(scan, *next); KMP_ASSERT(count >= 0); scan = next; @@ -3112,7 +3123,7 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, // Use a temp var in case macro is changed to evaluate // args multiple times. // - if (KMP_CPU_ISSET(j - stride, tempMask)) { + if (KMP_CPU_ISSET(j - 1, tempMask)) { KMP_CPU_SET(j, tempMask); setSize++; } @@ -3159,7 +3170,7 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, "bad explicit places list"); next = scan; SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); + int stride = __kmp_str_to_int(scan, *next); KMP_DEBUG_ASSERT(stride >= 0); scan = next; stride *= sign; diff --git a/openmp/runtime/src/kmp_alloc.c b/openmp/runtime/src/kmp_alloc.c index 30ab4bd3724..885754fd006 100644 --- a/openmp/runtime/src/kmp_alloc.c +++ b/openmp/runtime/src/kmp_alloc.c @@ -1,7 +1,7 @@ /* * kmp_alloc.c -- private/shared dyanmic memory allocation and management - * $Revision: 42613 $ - * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -31,7 +31,7 @@ typedef void (*bget_release_t)(void *); /* NOTE: bufsize must be a signed datatype */ #if KMP_OS_WINDOWS -# if KMP_ARCH_X86 +# if KMP_ARCH_X86 || KMP_ARCH_ARM typedef kmp_int32 bufsize; # else typedef kmp_int64 bufsize; @@ -74,7 +74,7 @@ static int bpoolv( kmp_info_t *th, void *pool); malloc() does not ensure 16 byte alignmnent */ -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || !KMP_HAVE_QUAD #define SizeQuant 8 #define AlignType double diff --git a/openmp/runtime/src/kmp_atomic.c b/openmp/runtime/src/kmp_atomic.c index 547aad550a1..3e9c82f874f 100644 --- a/openmp/runtime/src/kmp_atomic.c +++ b/openmp/runtime/src/kmp_atomic.c @@ -1,7 +1,7 @@ /* * kmp_atomic.c -- ATOMIC implementation routines - * $Revision: 42582 $ - * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -574,7 +574,7 @@ kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded ato */ #define KMP_ATOMIC_VOLATILE volatile -#if ( KMP_ARCH_X86 ) +#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; @@ -608,7 +608,7 @@ kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded ato /* ------------------------------------------------------------------------ */ // All routines declarations looks like -// void __kmpc_atomic_RTYPE_OP( ident_t*, int*, TYPE *lhs, TYPE rhs ); +// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); // ------------------------------------------------------------------------ #define KMP_CHECK_GTID \ @@ -721,6 +721,7 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh } \ } +#if USE_CMPXCHG_FIX // 2007-06-25: // workaround for C78287 (complex(kind=4) data type) // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) @@ -751,6 +752,7 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh } \ } // end of the first part of the workaround for C78287 +#endif // USE_CMPXCHG_FIX #if KMP_ARCH_X86 || KMP_ARCH_X86_64 @@ -775,6 +777,7 @@ ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ OP_CMPXCHG(TYPE,BITS,OP) \ } +#if USE_CMPXCHG_FIX // ------------------------------------------------------------------------- // workaround for C78287 (complex(kind=4) data type) #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ @@ -783,6 +786,7 @@ ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ } // end of the second part of the workaround for C78287 +#endif #else // ------------------------------------------------------------------------- @@ -820,6 +824,7 @@ ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ } \ } +#if USE_CMPXCHG_FIX // ------------------------------------------------------------------------- // workaround for C78287 (complex(kind=4) data type) #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ @@ -833,6 +838,7 @@ ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) } \ } // end of the second part of the workaround for C78287 +#endif // USE_CMPXCHG_FIX #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ // Routines for ATOMIC 4-byte operands addition and subtraction @@ -1068,12 +1074,14 @@ MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __km MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min +#if KMP_HAVE_QUAD MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min #if ( KMP_ARCH_X86 ) MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16 MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16 #endif +#endif // ------------------------------------------------------------------------ // Need separate macros for .EQV. because of the need of complement (~) // OP ignored for critical sections, ^=~ used instead @@ -1135,6 +1143,7 @@ ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __km ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div +#if KMP_HAVE_QUAD // routines for _Quad type ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub @@ -1146,14 +1155,22 @@ ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __km ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16 ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16 #endif +#endif // routines for complex types +#if USE_CMPXCHG_FIX // workaround for C78287 (complex(kind=4) data type) ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div // end of the workaround for C78287 +#else +ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add +ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub +ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul +ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div +#endif // USE_CMPXCHG_FIX ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub @@ -1163,6 +1180,7 @@ ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __km ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div +#if KMP_HAVE_QUAD ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul @@ -1173,6 +1191,7 @@ ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __km ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16 ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16 #endif +#endif #if OMP_40_ENABLED @@ -1312,6 +1331,7 @@ ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ // routines for long double type ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev +#if KMP_HAVE_QUAD // routines for _Quad type ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev @@ -1319,6 +1339,7 @@ ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev #endif +#endif // routines for complex types ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev @@ -1327,12 +1348,14 @@ ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev #if ( KMP_ARCH_X86 ) ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev #endif +#endif #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 @@ -1405,7 +1428,7 @@ ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, K ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) - +#if KMP_HAVE_QUAD ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp @@ -1444,10 +1467,12 @@ ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp +#endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems ==================================== +#if USE_CMPXCHG_FIX // workaround for C78287 (complex(kind=4) data type) #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ @@ -1456,6 +1481,13 @@ ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) } // end of the second part of the workaround for C78287 #else +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ +ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ + OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ + OP_CMPXCHG(TYPE,BITS,OP) \ +} +#endif // USE_CMPXCHG_FIX +#else // ------------------------------------------------------------------------ // Code for other architectures that don't handle unaligned accesses. #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ @@ -1624,7 +1656,9 @@ ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_ ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd +#endif // KMP_HAVE_QUAD // Fix for CQ220361 on Windows* OS #if ( KMP_OS_WINDOWS ) @@ -1634,11 +1668,13 @@ ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_ #endif ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd #if ( KMP_ARCH_X86 ) ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd #endif +#endif // ------------------------------------------------------------------------ @@ -1720,15 +1756,19 @@ ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // _ #endif ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr +#endif ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr #if ( KMP_ARCH_X86 ) ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr #endif +#endif // ------------------------------------------------------------------------ @@ -2058,12 +2098,14 @@ MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __k MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt +#if KMP_HAVE_QUAD MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt #if ( KMP_ARCH_X86 ) MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt #endif +#endif // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT @@ -2156,6 +2198,7 @@ ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt +#if KMP_HAVE_QUAD // routines for _Quad type ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt @@ -2167,6 +2210,7 @@ ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt #endif +#endif // routines for complex types @@ -2184,6 +2228,7 @@ ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt @@ -2194,6 +2239,7 @@ ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt #endif +#endif #if OMP_40_ENABLED @@ -2321,6 +2367,7 @@ ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ // routines for long double type ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev +#if KMP_HAVE_QUAD // routines for _Quad type ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev @@ -2328,6 +2375,7 @@ ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev #endif +#endif // routines for complex types @@ -2378,12 +2426,14 @@ ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev #if ( KMP_ARCH_X86 ) ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev #endif +#endif // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} @@ -2527,7 +2577,9 @@ ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp +#endif // cmplx4 routine to return void ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp @@ -2536,11 +2588,13 @@ ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_ato ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp +#if KMP_HAVE_QUAD ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp #if ( KMP_ARCH_X86 ) ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp #endif +#endif // End of OpenMP 4.0 Capture diff --git a/openmp/runtime/src/kmp_atomic.h b/openmp/runtime/src/kmp_atomic.h index 2243ba700aa..361dce9aa79 100644 --- a/openmp/runtime/src/kmp_atomic.h +++ b/openmp/runtime/src/kmp_atomic.h @@ -1,7 +1,7 @@ /* * kmp_atomic.h - ATOMIC header file - * $Revision: 42195 $ - * $Date: 2013-03-27 16:10:35 -0500 (Wed, 27 Mar 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -30,10 +30,6 @@ // to use typedef'ed types on win. // Condition for WIN64 was modified in anticipation of 10.1 build compiler. -#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER ) -typedef __float128 _Quad; -#endif - #if defined( __cplusplus ) && ( KMP_OS_WINDOWS ) // create shortcuts for c99 complex types @@ -173,6 +169,7 @@ typedef __float128 _Quad; typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80; // complex16 + #if KMP_HAVE_QUAD struct __kmp_cmplx128_t : std::complex< _Quad > { __kmp_cmplx128_t() : std::complex< _Quad > () {} @@ -192,6 +189,7 @@ typedef __float128 _Quad; }; typedef struct __kmp_cmplx128_t kmp_cmplx128; + #endif /* KMP_HAVE_QUAD */ #ifdef _DEBUG_TEMPORARILY_UNSET_ #undef _DEBUG_TEMPORARILY_UNSET_ @@ -204,19 +202,22 @@ typedef __float128 _Quad; typedef float _Complex kmp_cmplx32; typedef double _Complex kmp_cmplx64; typedef long double _Complex kmp_cmplx80; + #if KMP_HAVE_QUAD typedef _Quad _Complex kmp_cmplx128; + #endif #endif // Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad // and kmp_cmplx128) on IA-32 architecture. The following aligned structures // are implemented to support the old alignment in 10.1, 11.0, 11.1 and // introduce the new alignment in 12.0. See CQ88405. -#if ( KMP_ARCH_X86 ) +#if KMP_ARCH_X86 && KMP_HAVE_QUAD // 4-byte aligned structures for backward compatibility. #pragma pack( push, 4 ) + struct KMP_DO_ALIGN( 4 ) Quad_a4_t { _Quad q; @@ -364,31 +365,31 @@ extern int __kmp_atomic_mode; typedef kmp_queuing_lock_t kmp_atomic_lock_t; -inline void +static inline void __kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) { __kmp_acquire_queuing_lock( lck, gtid ); } -inline int +static inline int __kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) { return __kmp_test_queuing_lock( lck, gtid ); } -inline void +static inline void __kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) { __kmp_release_queuing_lock( lck, gtid ); } -inline void +static inline void __kmp_init_atomic_lock( kmp_atomic_lock_t *lck ) { __kmp_init_queuing_lock( lck ); } -inline void +static inline void __kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck ) { __kmp_destroy_queuing_lock( lck ); @@ -498,6 +499,7 @@ void __kmpc_atomic_float4_max( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +#if KMP_HAVE_QUAD void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); #if ( KMP_ARCH_X86 ) @@ -505,6 +507,7 @@ void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QU void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); #endif +#endif // .NEQV. (same as xor) void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); @@ -521,6 +524,7 @@ void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, lo void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); // _Quad type +#if KMP_HAVE_QUAD void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); @@ -532,6 +536,7 @@ void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QU void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); #endif +#endif // routines for complex types void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); @@ -545,6 +550,7 @@ void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, km void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); @@ -556,6 +562,7 @@ void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CP void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); #endif +#endif #if OMP_40_ENABLED @@ -593,14 +600,17 @@ void __kmpc_atomic_float8_sub_rev( ident_t *id_ref, int gtid, double * lhs, dou void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +#if KMP_HAVE_QUAD void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#endif void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); #if ( KMP_ARCH_X86 ) @@ -610,6 +620,7 @@ void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); #endif +#endif // KMP_HAVE_QUAD #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 @@ -632,6 +643,7 @@ void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lh void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) +#if KMP_HAVE_QUAD void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); @@ -670,6 +682,7 @@ void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +#endif // KMP_HAVE_QUAD // RHS=cmplx8 void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); @@ -701,7 +714,9 @@ kmp_int64 __kmpc_atomic_fixed8_rd( ident_t *id_ref, int gtid, kmp_int64 * kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc ); kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc ); long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc ); +#if KMP_HAVE_QUAD QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc ); +#endif // Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be // returned through an additional parameter #if ( KMP_OS_WINDOWS ) @@ -711,12 +726,14 @@ QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * #endif kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc ); kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc ); +#if KMP_HAVE_QUAD CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc ); #if ( KMP_ARCH_X86 ) // Routines with 16-byte arguments aligned to 16-byte boundary Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc ); kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc ); #endif +#endif // @@ -730,17 +747,20 @@ void __kmpc_atomic_fixed8_wr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +#if KMP_HAVE_QUAD void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#endif void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); #if ( KMP_ARCH_X86 ) // Routines with 16-byte arguments aligned to 16-byte boundary void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); #endif - +#endif // // Below routines for atomic CAPTURE are listed @@ -830,8 +850,10 @@ kmp_real32 __kmpc_atomic_float4_max_cpt( ident_t *id_ref, int gtid, kmp_real32 kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +#if KMP_HAVE_QUAD QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +#endif // .NEQV. (same as xor) char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); @@ -847,11 +869,13 @@ long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long doubl long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); +#if KMP_HAVE_QUAD // _Quad type QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +#endif // routines for complex types // Workaround for cmplx4 routines - return void; captured value is returned via the argument void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); @@ -867,6 +891,7 @@ kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx8 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); +#if KMP_HAVE_QUAD CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); @@ -884,6 +909,7 @@ CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LE kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); #endif +#endif void __kmpc_atomic_start(void); void __kmpc_atomic_end(void); @@ -922,8 +948,10 @@ double __kmpc_atomic_float8_sub_cpt_rev( ident_t *id_ref, int gtid, double * double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); +#if KMP_HAVE_QUAD QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); +#endif // Workaround for cmplx4 routines - return void; captured value is returned via the argument void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); @@ -931,6 +959,7 @@ kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_c kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); +#if KMP_HAVE_QUAD CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); #if ( KMP_ARCH_X86 ) @@ -939,6 +968,7 @@ CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); #endif +#endif // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs ); @@ -948,18 +978,22 @@ kmp_int64 __kmpc_atomic_fixed8_swp( ident_t *id_ref, int gtid, kmp_int64 * l float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs ); double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs ); long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +#if KMP_HAVE_QUAD QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +#endif // !!! TODO: check if we need a workaround here void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out ); //kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +#if KMP_HAVE_QUAD CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); #if ( KMP_ARCH_X86 ) Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); #endif +#endif // End of OpenMP 4.0 capture diff --git a/openmp/runtime/src/kmp_cancel.cpp b/openmp/runtime/src/kmp_cancel.cpp new file mode 100644 index 00000000000..e5a76d26951 --- /dev/null +++ b/openmp/runtime/src/kmp_cancel.cpp @@ -0,0 +1,282 @@ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "kmp.h" +#include "kmp_i18n.h" +#include "kmp_io.h" +#include "kmp_str.h" + +#if OMP_40_ENABLED + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread +@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) + +@return returns true if the cancellation request has been activated and the execution thread +needs to proceed to the end of the canceled region. + +Request cancellation of the binding OpenMP region. +*/ +kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + + KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); + + KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); + KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || + cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + if (__kmp_omp_cancellation) { + switch (cncl_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + // cancellation requests for parallel and worksharing constructs + // are handled through the team structure + { + kmp_team_t *this_team = this_thr->th.th_team; + KMP_DEBUG_ASSERT(this_team); + kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); + if (old == cancel_noreq || old == cncl_kind) { + //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", + // this_team->t.t_cancel_request, &(this_team->t.t_cancel_request)); + // we do not have a cancellation request in this team or we do have one + // that matches the current request -> cancel + return 1 /* true */; + } + break; + } + case cancel_taskgroup: + // cancellation requests for parallel and worksharing constructs + // are handled through the taskgroup structure + { + kmp_taskdata_t* task; + kmp_taskgroup_t* taskgroup; + + task = this_thr->th.th_current_task; + KMP_DEBUG_ASSERT( task ); + + taskgroup = task->td_taskgroup; + if (taskgroup) { + kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind); + if (old == cancel_noreq || old == cncl_kind) { + // we do not have a cancellation request in this taskgroup or we do have one + // that matches the current request -> cancel + return 1 /* true */; + } + } + else { + // TODO: what needs to happen here? + // the specification disallows cancellation w/o taskgroups + // so we might do anything here, let's abort for now + KMP_ASSERT( 0 /* false */); + } + } + break; + default: + KMP_ASSERT (0 /* false */); + } + } + + // ICV OMP_CANCELLATION=false, so we ignored this cancel request + KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); + return 0 /* false */; +} + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread +@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) + +@return returns true if a matching cancellation request has been flagged in the RTL and the +encountering thread has to cancel.. + +Cancellation point for the encountering thread. +*/ +kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + + KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); + + KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); + KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || + cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + if (__kmp_omp_cancellation) { + switch (cncl_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + // cancellation requests for parallel and worksharing constructs + // are handled through the team structure + { + kmp_team_t *this_team = this_thr->th.th_team; + KMP_DEBUG_ASSERT(this_team); + if (this_team->t.t_cancel_request) { + if (cncl_kind == this_team->t.t_cancel_request) { + // the request in the team structure matches the type of + // cancellation point so we can cancel + return 1 /* true */; + } + KMP_ASSERT( 0 /* false */); + } + else { + // we do not have a cancellation request pending, so we just + // ignore this cancellation point + return 0; + } + break; + } + case cancel_taskgroup: + // cancellation requests for parallel and worksharing constructs + // are handled through the taskgroup structure + { + kmp_taskdata_t* task; + kmp_taskgroup_t* taskgroup; + + task = this_thr->th.th_current_task; + KMP_DEBUG_ASSERT( task ); + + taskgroup = task->td_taskgroup; + if (taskgroup) { + // return the current status of cancellation for the + // taskgroup + return !!taskgroup->cancel_request; + } + else { + // if a cancellation point is encountered by a task + // that does not belong to a taskgroup, it is OK + // to ignore it + return 0 /* false */; + } + } + default: + KMP_ASSERT (0 /* false */); + } + } + + // ICV OMP_CANCELLATION=false, so we ignore the cancellation point + KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); + return 0 /* false */; +} + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread + +@return returns true if a matching cancellation request has been flagged in the RTL and the +encountering thread has to cancel.. + +Barrier with cancellation point to send threads from the barrier to the +end of the parallel region. Needs a special code pattern as documented +in the design document for the cancellation feature. +*/ +kmp_int32 +__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { + int ret = 0 /* false */; + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + kmp_team_t *this_team = this_thr->th.th_team; + + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + // call into the standard barrier + __kmpc_barrier(loc, gtid); + + // if cancellation is active, check cancellation flag + if (__kmp_omp_cancellation) { + // depending on which construct to cancel, check the flag and + // reset the flag + switch (this_team->t.t_cancel_request) { + case cancel_parallel: + ret = 1; + // ensure that threads have checked the flag, when + // leaving the above barrier + __kmpc_barrier(loc, gtid); + this_team->t.t_cancel_request = cancel_noreq; + // the next barrier is the fork/join barrier, which + // synchronizes the threads leaving here + break; + case cancel_loop: + case cancel_sections: + ret = 1; + // ensure that threads have checked the flag, when + // leaving the above barrier + __kmpc_barrier(loc, gtid); + this_team->t.t_cancel_request = cancel_noreq; + // synchronize the threads again to make sure we + // do not have any run-away threads that cause a race + // on the cancellation flag + __kmpc_barrier(loc, gtid); + break; + case cancel_taskgroup: + // this case should not occur + KMP_ASSERT (0 /* false */ ); + break; + case cancel_noreq: + // do nothing + break; + default: + KMP_ASSERT ( 0 /* false */); + } + } + + return ret; +} + +/*! +@ingroup CANCELLATION +@param loc_ref location of the original task directive +@param gtid Global thread ID of encountering thread + +@return returns true if a matching cancellation request has been flagged in the RTL and the +encountering thread has to cancel.. + +Query function to query the current status of cancellation requests. +Can be used to implement the following pattern: + +if (kmp_get_cancellation_status(kmp_cancel_parallel)) { + perform_cleanup(); + #pragma omp cancellation point parallel +} +*/ +int __kmp_get_cancellation_status(int cancel_kind) { + if (__kmp_omp_cancellation) { + kmp_info_t *this_thr = __kmp_entry_thread(); + + switch (cancel_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + { + kmp_team_t *this_team = this_thr->th.th_team; + return this_team->t.t_cancel_request == cancel_kind; + } + case cancel_taskgroup: + { + kmp_taskdata_t* task; + kmp_taskgroup_t* taskgroup; + task = this_thr->th.th_current_task; + taskgroup = task->td_taskgroup; + return taskgroup && taskgroup->cancel_request; + } + } + } + + return 0 /* false */; +} + +#endif diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c index 8ca4612c473..17cc5347fa2 100644 --- a/openmp/runtime/src/kmp_csupport.c +++ b/openmp/runtime/src/kmp_csupport.c @@ -1,7 +1,7 @@ /* * kmp_csupport.c -- kfront linkage support for OpenMP. - * $Revision: 42642 $ - * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $ + * $Revision: 42826 $ + * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $ */ @@ -287,7 +287,7 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) VOLATILE_CAST(microtask_t) microtask, VOLATILE_CAST(launch_t) __kmp_invoke_task_func, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX &ap #else ap @@ -351,7 +351,7 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) argc, VOLATILE_CAST(microtask_t) __kmp_teams_master, VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX &ap #else ap @@ -622,28 +622,20 @@ __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) if ( __kmp_env_consistency_check ) __kmp_push_parallel( global_tid, NULL ); -#if USE_ITT_BUILD +// t_level is not available in 2.5 build, so check for OMP_30_ENABLED +#if USE_ITT_BUILD && OMP_30_ENABLED // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) { __kmp_itt_region_forking( global_tid, 1 ); } - // Collect information only if the file was opened succesfully. - if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file ) + if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) { +#if USE_ITT_NOTIFY if( this_thr->th.th_team->t.t_level == 1 ) { - kmp_uint64 fr_begin; -#if defined( __GNUC__ ) -# if !defined( __INTEL_COMPILER ) - fr_begin = __kmp_hardware_timestamp(); -# else - fr_begin = __rdtsc(); -# endif -#else - fr_begin = __rdtsc(); -#endif - this_thr->th.th_frame_time_serialized = fr_begin; + this_thr->th.th_frame_time_serialized = __itt_get_timestamp(); } +#endif } #endif /* USE_ITT_BUILD */ @@ -774,39 +766,17 @@ __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) } -#if USE_ITT_BUILD +// t_level is not available in 2.5 build, so check for OMP_30_ENABLED +#if USE_ITT_BUILD && OMP_30_ENABLED // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment. if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG ) { + this_thr->th.th_ident = loc; __kmp_itt_region_joined( global_tid, 1 ); } - // Collect information only if the file was opened succesfully. - if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file ) - { + if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) { if( this_thr->th.th_team->t.t_level == 0 ) { - ident_t * loc = this_thr->th.th_ident; - if (loc) { - // Use compiler-generated location to mark the frame: - // "<func>$omp$frame@[file:]<line>[:<col>]" - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - - kmp_uint64 fr_end; -#if defined( __GNUC__ ) -# if !defined( __INTEL_COMPILER ) - fr_end = __kmp_hardware_timestamp(); -# else - fr_end = __rdtsc(); -# endif -#else - fr_end = __rdtsc(); -#endif - K_DIAG( 3, ( "__kmpc_end_serialized_parallel: T#%d frame_begin = %llu, frame_end = %llu\n", - global_tid, this_thr->th.th_frame_time, fr_end ) ); - - __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n", - str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time_serialized, fr_end ); - __kmp_str_loc_free( &str_loc ); - } + __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, __itt_timestamp_none, 0, loc ); } } #endif /* USE_ITT_BUILD */ @@ -858,13 +828,15 @@ __kmpc_flush(ident_t *loc, ...) if ( ! __kmp_cpuinfo.sse2 ) { // CPU cannot execute SSE2 instructions. } else { - #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER ) - __sync_synchronize(); - #else + #if KMP_COMPILER_ICC _mm_mfence(); - #endif // __GNUC__ + #else + __sync_synchronize(); + #endif // KMP_COMPILER_ICC }; // if #endif // KMP_MIC + #elif KMP_ARCH_ARM + // Nothing yet #else #error Unknown or unsupported architecture #endif @@ -1110,7 +1082,7 @@ __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { lck = (kmp_user_lock_p)crit; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { lck = (kmp_user_lock_p)crit; @@ -1163,7 +1135,7 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { lck = (kmp_user_lock_p)crit; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { lck = (kmp_user_lock_p)crit; @@ -1598,14 +1570,14 @@ __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } #endif else { - lck = __kmp_user_lock_allocate( user_lock, gtid ); + lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); } INIT_LOCK( lck ); __kmp_set_user_lock_location( lck, loc ); @@ -1634,7 +1606,7 @@ __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { @@ -1642,7 +1614,7 @@ __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { } #endif else { - lck = __kmp_user_lock_allocate( user_lock, gtid ); + lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); } INIT_NESTED_LOCK( lck ); @@ -1662,7 +1634,7 @@ __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; @@ -1681,7 +1653,7 @@ __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { ; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { ; @@ -1702,7 +1674,7 @@ __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { @@ -1723,7 +1695,7 @@ __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { ; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { @@ -1743,7 +1715,7 @@ __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; @@ -1773,7 +1745,7 @@ __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { @@ -1805,7 +1777,7 @@ __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) // "fast" path implemented to fix customer performance issue #if USE_ITT_BUILD __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); @@ -1817,7 +1789,7 @@ __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) lck = (kmp_user_lock_p)user_lock; #endif } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; @@ -1844,7 +1816,7 @@ __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) // "fast" path implemented to fix customer performance issue kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock; #if USE_ITT_BUILD @@ -1859,7 +1831,7 @@ __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) lck = (kmp_user_lock_p)user_lock; #endif } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { @@ -1888,7 +1860,7 @@ __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; @@ -1926,7 +1898,7 @@ __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { lck = (kmp_user_lock_p)user_lock; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( ( __kmp_user_lock_kind == lk_futex ) && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index 1128b871d58..cb5bdac532a 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -1,7 +1,7 @@ /* * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. - * $Revision: 42624 $ - * $Date: 2013-08-27 10:53:11 -0500 (Tue, 27 Aug 2013) $ + * $Revision: 42674 $ + * $Date: 2013-09-18 11:12:49 -0500 (Wed, 18 Sep 2013) $ */ @@ -916,7 +916,8 @@ __kmp_dispatch_init( */ // save original FPCW and set precision to 64-bit, as // Windows* OS on IA-32 architecture defaults to 53-bit - unsigned int oldFpcw = _control87(0,0x30000); + unsigned int oldFpcw = _control87(0,0); + _control87(_PC_64,_MCW_PC); // 0,0x30000 #endif /* value used for comparison in solver for cross-over point */ long double target = ((long double)chunk * 2 + 1) * nproc / tc; @@ -995,7 +996,7 @@ __kmp_dispatch_init( pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk; #if KMP_OS_WINDOWS && KMP_ARCH_X86 // restore FPCW - _control87(oldFpcw,0x30000); + _control87(oldFpcw,_MCW_PC); #endif } // if } else { @@ -1836,7 +1837,7 @@ __kmp_dispatch_next( /* for storing original FPCW value for Windows* OS on IA-32 architecture 8-byte version */ unsigned int oldFpcw; - int fpcwSet = 0; + unsigned int fpcwSet = 0; #endif KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n", gtid ) ); @@ -1870,7 +1871,8 @@ __kmp_dispatch_next( FPCW and set precision to 64-bit, as Windows* OS on IA-32 architecture defaults to 53-bit */ if ( !fpcwSet ) { - oldFpcw = _control87(0,0x30000); + oldFpcw = _control87(0,0); + _control87(_PC_64,_MCW_PC); fpcwSet = 0x30000; } #endif @@ -1893,9 +1895,11 @@ __kmp_dispatch_next( } // if } // while (1) #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* restore FPCW if necessary */ - if ( oldFpcw & fpcwSet != 0 ) - _control87(oldFpcw,0x30000); + /* restore FPCW if necessary + AC: check fpcwSet flag first because oldFpcw can be uninitialized here + */ + if ( fpcwSet && ( oldFpcw & fpcwSet ) ) + _control87(oldFpcw,_MCW_PC); #endif if ( status != 0 ) { start = pr->u.p.lb; diff --git a/openmp/runtime/src/kmp_ftn_cdecl.c b/openmp/runtime/src/kmp_ftn_cdecl.c index 7079ee9b539..135a7cb7eb3 100644 --- a/openmp/runtime/src/kmp_ftn_cdecl.c +++ b/openmp/runtime/src/kmp_ftn_cdecl.c @@ -1,7 +1,7 @@ /* * kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP. - * $Revision: 42061 $ - * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $ + * $Revision: 42757 $ + * $Date: 2013-10-18 08:20:57 -0500 (Fri, 18 Oct 2013) $ */ @@ -17,21 +17,21 @@ #include "kmp.h" +#if KMP_OS_WINDOWS +# if defined KMP_WIN_CDECL || !defined GUIDEDLL_EXPORTS +# define KMP_FTN_ENTRIES KMP_FTN_UPPER +# endif +#elif KMP_OS_UNIX +# define KMP_FTN_ENTRIES KMP_FTN_PLAIN +#endif + // Note: This string is not printed when KMP_VERSION=1. char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " -#ifdef USE_FTN_CDECL +#ifdef KMP_FTN_ENTRIES "yes"; +# define FTN_STDCALL /* no stdcall */ +# include "kmp_ftn_os.h" +# include "kmp_ftn_entry.h" #else "no"; -#endif - -#ifdef USE_FTN_CDECL - -#define FTN_STDCALL /* no stdcall */ -#define KMP_FTN_ENTRIES USE_FTN_CDECL - -#include "kmp_ftn_os.h" -#include "kmp_ftn_entry.h" - -#endif /* USE_FTN_CDECL */ - +#endif /* KMP_FTN_ENTRIES */ diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index f2c6440e988..dbbca19ac35 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1,7 +1,7 @@ /* * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP. - * $Revision: 42507 $ - * $Date: 2013-07-11 07:55:25 -0500 (Thu, 11 Jul 2013) $ + * $Revision: 42798 $ + * $Date: 2013-10-30 16:39:54 -0500 (Wed, 30 Oct 2013) $ */ @@ -356,7 +356,7 @@ FTN_GET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) /* sets the requested number of threads for the next parallel region */ void FTN_STDCALL -FTN_SET_NUM_THREADS( int KMP_DEREF arg ) +xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg ) { #ifdef KMP_STUB // Nothing. @@ -368,7 +368,7 @@ FTN_SET_NUM_THREADS( int KMP_DEREF arg ) /* returns the number of threads in current team */ int FTN_STDCALL -FTN_GET_NUM_THREADS( void ) +xexpand(FTN_GET_NUM_THREADS)( void ) { #ifdef KMP_STUB return 1; @@ -379,7 +379,7 @@ FTN_GET_NUM_THREADS( void ) } int FTN_STDCALL -FTN_GET_MAX_THREADS( void ) +xexpand(FTN_GET_MAX_THREADS)( void ) { #ifdef KMP_STUB return 1; @@ -401,7 +401,7 @@ FTN_GET_MAX_THREADS( void ) } int FTN_STDCALL -FTN_GET_THREAD_NUM( void ) +xexpand(FTN_GET_THREAD_NUM)( void ) { #ifdef KMP_STUB return 0; @@ -458,7 +458,7 @@ FTN_GET_NUM_KNOWN_THREADS( void ) } int FTN_STDCALL -FTN_GET_NUM_PROCS( void ) +xexpand(FTN_GET_NUM_PROCS)( void ) { #ifdef KMP_STUB return 1; @@ -472,7 +472,7 @@ FTN_GET_NUM_PROCS( void ) } void FTN_STDCALL -FTN_SET_NESTED( int KMP_DEREF flag ) +xexpand(FTN_SET_NESTED)( int KMP_DEREF flag ) { #ifdef KMP_STUB __kmps_set_nested( KMP_DEREF flag ); @@ -487,7 +487,7 @@ FTN_SET_NESTED( int KMP_DEREF flag ) int FTN_STDCALL -FTN_GET_NESTED( void ) +xexpand(FTN_GET_NESTED)( void ) { #ifdef KMP_STUB return __kmps_get_nested(); @@ -499,7 +499,7 @@ FTN_GET_NESTED( void ) } void FTN_STDCALL -FTN_SET_DYNAMIC( int KMP_DEREF flag ) +xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag ) { #ifdef KMP_STUB __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE ); @@ -515,7 +515,7 @@ FTN_SET_DYNAMIC( int KMP_DEREF flag ) int FTN_STDCALL -FTN_GET_DYNAMIC( void ) +xexpand(FTN_GET_DYNAMIC)( void ) { #ifdef KMP_STUB return __kmps_get_dynamic(); @@ -527,7 +527,7 @@ FTN_GET_DYNAMIC( void ) } int FTN_STDCALL -FTN_IN_PARALLEL( void ) +xexpand(FTN_IN_PARALLEL)( void ) { #ifdef KMP_STUB return 0; @@ -550,7 +550,7 @@ FTN_IN_PARALLEL( void ) #if OMP_30_ENABLED void FTN_STDCALL -FTN_SET_SCHEDULE( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) +xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) { #ifdef KMP_STUB __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier ); @@ -562,7 +562,7 @@ FTN_SET_SCHEDULE( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) } void FTN_STDCALL -FTN_GET_SCHEDULE( kmp_sched_t * kind, int * modifier ) +xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier ) { #ifdef KMP_STUB __kmps_get_schedule( kind, modifier ); @@ -574,7 +574,7 @@ FTN_GET_SCHEDULE( kmp_sched_t * kind, int * modifier ) } void FTN_STDCALL -FTN_SET_MAX_ACTIVE_LEVELS( int KMP_DEREF arg ) +xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg ) { #ifdef KMP_STUB // Nothing. @@ -586,7 +586,7 @@ FTN_SET_MAX_ACTIVE_LEVELS( int KMP_DEREF arg ) } int FTN_STDCALL -FTN_GET_MAX_ACTIVE_LEVELS( void ) +xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void ) { #ifdef KMP_STUB return 0; @@ -598,7 +598,7 @@ FTN_GET_MAX_ACTIVE_LEVELS( void ) } int FTN_STDCALL -FTN_GET_ACTIVE_LEVEL( void ) +xexpand(FTN_GET_ACTIVE_LEVEL)( void ) { #ifdef KMP_STUB return 0; // returns 0 if it is called from the sequential part of the program @@ -610,7 +610,7 @@ FTN_GET_ACTIVE_LEVEL( void ) } int FTN_STDCALL -FTN_GET_LEVEL( void ) +xexpand(FTN_GET_LEVEL)( void ) { #ifdef KMP_STUB return 0; // returns 0 if it is called from the sequential part of the program @@ -622,7 +622,7 @@ FTN_GET_LEVEL( void ) } int FTN_STDCALL -FTN_GET_ANCESTOR_THREAD_NUM( int KMP_DEREF level ) +xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level ) { #ifdef KMP_STUB return ( KMP_DEREF level ) ? ( -1 ) : ( 0 ); @@ -632,7 +632,7 @@ FTN_GET_ANCESTOR_THREAD_NUM( int KMP_DEREF level ) } int FTN_STDCALL -FTN_GET_TEAM_SIZE( int KMP_DEREF level ) +xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level ) { #ifdef KMP_STUB return ( KMP_DEREF level ) ? ( -1 ) : ( 1 ); @@ -642,7 +642,7 @@ FTN_GET_TEAM_SIZE( int KMP_DEREF level ) } int FTN_STDCALL -FTN_GET_THREAD_LIMIT( void ) +xexpand(FTN_GET_THREAD_LIMIT)( void ) { #ifdef KMP_STUB return 1; // TO DO: clarify whether it returns 1 or 0? @@ -656,7 +656,7 @@ FTN_GET_THREAD_LIMIT( void ) } int FTN_STDCALL -FTN_IN_FINAL( void ) +xexpand(FTN_IN_FINAL)( void ) { #ifdef KMP_STUB return 0; // TO DO: clarify whether it returns 1 or 0? @@ -674,7 +674,7 @@ FTN_IN_FINAL( void ) kmp_proc_bind_t FTN_STDCALL -FTN_GET_PROC_BIND( void ) +xexpand(FTN_GET_PROC_BIND)( void ) { #ifdef KMP_STUB return __kmps_get_proc_bind(); @@ -684,7 +684,7 @@ FTN_GET_PROC_BIND( void ) } int FTN_STDCALL -FTN_GET_NUM_TEAMS( void ) +xexpand(FTN_GET_NUM_TEAMS)( void ) { #ifdef KMP_STUB return 1; @@ -723,7 +723,7 @@ FTN_GET_NUM_TEAMS( void ) } int FTN_STDCALL -FTN_GET_TEAM_NUM( void ) +xexpand(FTN_GET_TEAM_NUM)( void ) { #ifdef KMP_STUB return 0; @@ -793,7 +793,7 @@ typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; /* initialize the lock */ void FTN_STDCALL -FTN_INIT_LOCK( void **user_lock ) +xexpand(FTN_INIT_LOCK)( void **user_lock ) { #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; @@ -804,7 +804,7 @@ FTN_INIT_LOCK( void **user_lock ) /* initialize the lock */ void FTN_STDCALL -FTN_INIT_NEST_LOCK( void **user_lock ) +xexpand(FTN_INIT_NEST_LOCK)( void **user_lock ) { #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; @@ -814,7 +814,7 @@ FTN_INIT_NEST_LOCK( void **user_lock ) } void FTN_STDCALL -FTN_DESTROY_LOCK( void **user_lock ) +xexpand(FTN_DESTROY_LOCK)( void **user_lock ) { #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNINIT; @@ -824,7 +824,7 @@ FTN_DESTROY_LOCK( void **user_lock ) } void FTN_STDCALL -FTN_DESTROY_NEST_LOCK( void **user_lock ) +xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock ) { #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNINIT; @@ -834,7 +834,7 @@ FTN_DESTROY_NEST_LOCK( void **user_lock ) } void FTN_STDCALL -FTN_SET_LOCK( void **user_lock ) +xexpand(FTN_SET_LOCK)( void **user_lock ) { #ifdef KMP_STUB if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { @@ -850,7 +850,7 @@ FTN_SET_LOCK( void **user_lock ) } void FTN_STDCALL -FTN_SET_NEST_LOCK( void **user_lock ) +xexpand(FTN_SET_NEST_LOCK)( void **user_lock ) { #ifdef KMP_STUB if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { @@ -863,7 +863,7 @@ FTN_SET_NEST_LOCK( void **user_lock ) } void FTN_STDCALL -FTN_UNSET_LOCK( void **user_lock ) +xexpand(FTN_UNSET_LOCK)( void **user_lock ) { #ifdef KMP_STUB if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { @@ -879,7 +879,7 @@ FTN_UNSET_LOCK( void **user_lock ) } void FTN_STDCALL -FTN_UNSET_NEST_LOCK( void **user_lock ) +xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock ) { #ifdef KMP_STUB if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { @@ -895,7 +895,7 @@ FTN_UNSET_NEST_LOCK( void **user_lock ) } int FTN_STDCALL -FTN_TEST_LOCK( void **user_lock ) +xexpand(FTN_TEST_LOCK)( void **user_lock ) { #ifdef KMP_STUB if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { @@ -912,7 +912,7 @@ FTN_TEST_LOCK( void **user_lock ) } int FTN_STDCALL -FTN_TEST_NEST_LOCK( void **user_lock ) +xexpand(FTN_TEST_NEST_LOCK)( void **user_lock ) { #ifdef KMP_STUB if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { @@ -925,7 +925,7 @@ FTN_TEST_NEST_LOCK( void **user_lock ) } double FTN_STDCALL -FTN_GET_WTIME( void ) +xexpand(FTN_GET_WTIME)( void ) { #ifdef KMP_STUB return __kmps_get_wtime(); @@ -944,7 +944,7 @@ FTN_GET_WTIME( void ) } double FTN_STDCALL -FTN_GET_WTICK( void ) +xexpand(FTN_GET_WTICK)( void ) { #ifdef KMP_STUB return __kmps_get_wtick(); @@ -1022,6 +1022,191 @@ FTN_SET_DEFAULTS( char const * str /* ------------------------------------------------------------------------ */ +#if OMP_40_ENABLED +/* returns the status of cancellation */ +int FTN_STDCALL +xexpand(FTN_GET_CANCELLATION)(void) { +#ifdef KMP_STUB + return 0 /* false */; +#else + // initialize the library if needed + if ( ! __kmp_init_serial ) { + __kmp_serial_initialize(); + } + return __kmp_omp_cancellation; +#endif +} + +int FTN_STDCALL +FTN_GET_CANCELLATION_STATUS(int cancel_kind) { +#ifdef KMP_STUB + return 0 /* false */; +#else + return __kmp_get_cancellation_status(cancel_kind); +#endif +} + +#endif // OMP_40_ENABLED + +// GCC compatibility (versioned symbols) +#if KMP_OS_LINUX + +/* + These following sections create function aliases (dummy symbols) for the omp_* routines. + These aliases will then be versioned according to how libgomp ``versions'' its + symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the + default version which libiomp5 uses: VERSION (defined in exports_so.txt) + If you want to see the versioned symbols for libgomp.so.1 then just type: + + objdump -T /path/to/libgomp.so.1 | grep omp_ + + Example: + Step 1) Create __kmp_api_omp_set_num_threads_10_alias + which is alias of __kmp_api_omp_set_num_threads + Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0 + Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION +*/ + +// OMP_1.0 aliases +xaliasify(FTN_SET_NUM_THREADS, 10); +xaliasify(FTN_GET_NUM_THREADS, 10); +xaliasify(FTN_GET_MAX_THREADS, 10); +xaliasify(FTN_GET_THREAD_NUM, 10); +xaliasify(FTN_GET_NUM_PROCS, 10); +xaliasify(FTN_IN_PARALLEL, 10); +xaliasify(FTN_SET_DYNAMIC, 10); +xaliasify(FTN_GET_DYNAMIC, 10); +xaliasify(FTN_SET_NESTED, 10); +xaliasify(FTN_GET_NESTED, 10); +xaliasify(FTN_INIT_LOCK, 10); +xaliasify(FTN_INIT_NEST_LOCK, 10); +xaliasify(FTN_DESTROY_LOCK, 10); +xaliasify(FTN_DESTROY_NEST_LOCK, 10); +xaliasify(FTN_SET_LOCK, 10); +xaliasify(FTN_SET_NEST_LOCK, 10); +xaliasify(FTN_UNSET_LOCK, 10); +xaliasify(FTN_UNSET_NEST_LOCK, 10); +xaliasify(FTN_TEST_LOCK, 10); +xaliasify(FTN_TEST_NEST_LOCK, 10); + +// OMP_2.0 aliases +xaliasify(FTN_GET_WTICK, 20); +xaliasify(FTN_GET_WTIME, 20); + +#if OMP_30_ENABLED +// OMP_3.0 aliases +xaliasify(FTN_SET_SCHEDULE, 30); +xaliasify(FTN_GET_SCHEDULE, 30); +xaliasify(FTN_GET_THREAD_LIMIT, 30); +xaliasify(FTN_SET_MAX_ACTIVE_LEVELS, 30); +xaliasify(FTN_GET_MAX_ACTIVE_LEVELS, 30); +xaliasify(FTN_GET_LEVEL, 30); +xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30); +xaliasify(FTN_GET_TEAM_SIZE, 30); +xaliasify(FTN_GET_ACTIVE_LEVEL, 30); +xaliasify(FTN_INIT_LOCK, 30); +xaliasify(FTN_INIT_NEST_LOCK, 30); +xaliasify(FTN_DESTROY_LOCK, 30); +xaliasify(FTN_DESTROY_NEST_LOCK, 30); +xaliasify(FTN_SET_LOCK, 30); +xaliasify(FTN_SET_NEST_LOCK, 30); +xaliasify(FTN_UNSET_LOCK, 30); +xaliasify(FTN_UNSET_NEST_LOCK, 30); +xaliasify(FTN_TEST_LOCK, 30); +xaliasify(FTN_TEST_NEST_LOCK, 30); + +// OMP_3.1 aliases +xaliasify(FTN_IN_FINAL, 31); +#endif /* OMP_30_ENABLED */ + +#if OMP_40_ENABLED +// OMP_4.0 aliases +xaliasify(FTN_GET_PROC_BIND, 40); +xaliasify(FTN_GET_NUM_TEAMS, 40); +xaliasify(FTN_GET_TEAM_NUM, 40); +xaliasify(FTN_GET_CANCELLATION, 40); +#endif /* OMP_40_ENABLED */ + +#if OMP_41_ENABLED +// OMP_4.1 aliases +#endif + +#if OMP_50_ENABLED +// OMP_5.0 aliases +#endif + +// OMP_1.0 versioned symbols +xversionify(FTN_SET_NUM_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_NUM_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_MAX_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_THREAD_NUM, 10, "OMP_1.0"); +xversionify(FTN_GET_NUM_PROCS, 10, "OMP_1.0"); +xversionify(FTN_IN_PARALLEL, 10, "OMP_1.0"); +xversionify(FTN_SET_DYNAMIC, 10, "OMP_1.0"); +xversionify(FTN_GET_DYNAMIC, 10, "OMP_1.0"); +xversionify(FTN_SET_NESTED, 10, "OMP_1.0"); +xversionify(FTN_GET_NESTED, 10, "OMP_1.0"); +xversionify(FTN_INIT_LOCK, 10, "OMP_1.0"); +xversionify(FTN_INIT_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_DESTROY_LOCK, 10, "OMP_1.0"); +xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_UNSET_LOCK, 10, "OMP_1.0"); +xversionify(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_TEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_TEST_NEST_LOCK, 10, "OMP_1.0"); + +// OMP_2.0 versioned symbols +xversionify(FTN_GET_WTICK, 20, "OMP_2.0"); +xversionify(FTN_GET_WTIME, 20, "OMP_2.0"); + +#if OMP_30_ENABLED +// OMP_3.0 versioned symbols +xversionify(FTN_SET_SCHEDULE, 30, "OMP_3.0"); +xversionify(FTN_GET_SCHEDULE, 30, "OMP_3.0"); +xversionify(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0"); +xversionify(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); +xversionify(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); +xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0"); +xversionify(FTN_GET_LEVEL, 30, "OMP_3.0"); +xversionify(FTN_GET_TEAM_SIZE, 30, "OMP_3.0"); +xversionify(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0"); + +// the lock routines have a 1.0 and 3.0 version +xversionify(FTN_INIT_LOCK, 30, "OMP_3.0"); +xversionify(FTN_INIT_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_DESTROY_LOCK, 30, "OMP_3.0"); +xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_SET_LOCK, 30, "OMP_3.0"); +xversionify(FTN_SET_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_UNSET_LOCK, 30, "OMP_3.0"); +xversionify(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_TEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_TEST_NEST_LOCK, 30, "OMP_3.0"); + +// OMP_3.1 versioned symbol +xversionify(FTN_IN_FINAL, 31, "OMP_3.1"); +#endif /* OMP_30_ENABLED */ + +#if OMP_40_ENABLED +// OMP_4.0 versioned symbols +xversionify(FTN_GET_PROC_BIND, 40, "OMP_4.0"); +xversionify(FTN_GET_NUM_TEAMS, 40, "OMP_4.0"); +xversionify(FTN_GET_TEAM_NUM, 40, "OMP_4.0"); +xversionify(FTN_GET_CANCELLATION, 40, "OMP_4.0"); +#endif /* OMP_40_ENABLED */ + +#if OMP_41_ENABLED +// OMP_4.1 versioned symbols +#endif + +#if OMP_50_ENABLED +// OMP_5.0 versioned symbols +#endif + +#endif /* KMP_OS_LINUX */ + #ifdef __cplusplus } //extern "C" #endif // __cplusplus diff --git a/openmp/runtime/src/kmp_ftn_extra.c b/openmp/runtime/src/kmp_ftn_extra.c index 6e1bb7eac03..6777e01ba98 100644 --- a/openmp/runtime/src/kmp_ftn_extra.c +++ b/openmp/runtime/src/kmp_ftn_extra.c @@ -1,7 +1,7 @@ /* * kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP. - * $Revision: 42061 $ - * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $ + * $Revision: 42757 $ + * $Date: 2013-10-18 08:20:57 -0500 (Fri, 18 Oct 2013) $ */ @@ -17,21 +17,19 @@ #include "kmp.h" +#if KMP_OS_WINDOWS +# define KMP_FTN_ENTRIES KMP_FTN_PLAIN +#elif KMP_OS_UNIX +# define KMP_FTN_ENTRIES KMP_FTN_APPEND +#endif + // Note: This string is not printed when KMP_VERSION=1. char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " -#ifdef USE_FTN_EXTRA +#ifdef KMP_FTN_ENTRIES "yes"; +# define FTN_STDCALL /* nothing to do */ +# include "kmp_ftn_os.h" +# include "kmp_ftn_entry.h" #else "no"; -#endif - -#ifdef USE_FTN_EXTRA - -#define FTN_STDCALL /* nothing to do */ -#define KMP_FTN_ENTRIES USE_FTN_EXTRA - -#include "kmp_ftn_os.h" -#include "kmp_ftn_entry.h" - -#endif /* USE_FTN_EXTRA */ - +#endif /* KMP_FTN_ENTRIES */ diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index c52ca1e0a79..f241751c7e9 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -1,7 +1,7 @@ /* * kmp_ftn_os.h -- KPTS Fortran defines header file. - * $Revision: 42478 $ - * $Date: 2013-07-02 15:15:08 -0500 (Tue, 02 Jul 2013) $ + * $Revision: 42745 $ + * $Date: 2013-10-14 17:02:04 -0500 (Mon, 14 Oct 2013) $ */ @@ -105,6 +105,11 @@ #endif #endif +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION omp_get_cancellation + #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status +#endif + #endif /* KMP_FTN_PLAIN */ /* ------------------------------------------------------------------------ */ @@ -192,6 +197,11 @@ #endif +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION omp_get_cancellation_ + #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ +#endif + #endif /* KMP_FTN_APPEND */ /* ------------------------------------------------------------------------ */ @@ -279,6 +289,11 @@ #endif +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION + #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS +#endif + #endif /* KMP_FTN_UPPER */ /* ------------------------------------------------------------------------ */ @@ -366,7 +381,134 @@ #endif +#if OMP_40_ENABLED + #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ + #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ +#endif + #endif /* KMP_FTN_UAPPEND */ +/* ------------------------------------------------------------------ */ +/* -------------------------- GOMP API NAMES ------------------------ */ +// All GOMP_1.0 symbols +#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end +#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start +#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier +#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end +#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end +#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start +#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start +#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end +#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait +#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next +#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT GOMP_loop_ordered_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START GOMP_loop_ordered_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START GOMP_loop_ordered_guided_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT GOMP_loop_ordered_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START GOMP_loop_ordered_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START GOMP_loop_ordered_static_start +#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next +#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start +#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next +#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start +#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end +#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start +#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START GOMP_parallel_loop_dynamic_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START GOMP_parallel_loop_guided_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START GOMP_parallel_loop_runtime_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START GOMP_parallel_loop_static_start +#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start +#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start +#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end +#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait +#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next +#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start +#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end +#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start +#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start + +// All GOMP_2.0 symbols +#define KMP_API_NAME_GOMP_TASK GOMP_task +#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait +#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT GOMP_loop_ull_ordered_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT GOMP_loop_ull_ordered_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START GOMP_loop_ull_ordered_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT GOMP_loop_ull_ordered_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT GOMP_loop_ull_ordered_static_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START GOMP_loop_ull_ordered_static_start +#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next +#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start + +// All GOMP_3.0 symbols +#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield + +// All GOMP_4.0 symbols +// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libiomp5 +#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel +#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel +#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point +#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static +#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections +#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel +#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel +#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start +#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end +/* Target functions should be taken care of by liboffload */ +//#define KMP_API_NAME_GOMP_TARGET GOMP_target +//#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data +//#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data +//#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update +#define KMP_API_NAME_GOMP_TEAMS GOMP_teams + +#if KMP_OS_LINUX + #define xstr(x) str(x) + #define str(x) #x + + // If Linux, xexpand prepends __kmp_api_ to the real API name + #define xexpand(api_name) expand(api_name) + #define expand(api_name) __kmp_api_##api_name + + #define xaliasify(api_name,ver) aliasify(api_name,ver) + #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name)))) + + #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION") + #define versionify(api_name, version_num, version_str, default_ver) \ + __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \ + __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t") + +#else /* KMP_OS_LINUX */ + #define xstr(x) /* Nothing */ + #define str(x) /* Nothing */ + + // if Windows or Mac, xexpand does no name transformation + #define xexpand(api_name) expand(api_name) + #define expand(api_name) api_name + + #define xaliasify(api_name,ver) /* Nothing */ + #define aliasify(api_name,ver) /* Nothing */ + + #define xversionify(api_name, version_num, version_str) /* Nothing */ + #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */ + +#endif /* KMP_OS_LINUX */ + #endif /* KMP_FTN_OS_H */ diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c index db817648c86..b27b17164c3 100644 --- a/openmp/runtime/src/kmp_global.c +++ b/openmp/runtime/src/kmp_global.c @@ -1,7 +1,7 @@ /* * kmp_global.c -- KPTS global variables for runtime support library - * $Revision: 42642 $ - * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $ + * $Revision: 42816 $ + * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $ */ @@ -24,7 +24,6 @@ char __kmp_setversion_string[] = VERSION_STRING; kmp_key_t __kmp_gtid_threadprivate_key; kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized -kmp_uint64 __kmp_cpu_frequency = 0; /* ----------------------------------------------------- */ @@ -181,6 +180,7 @@ char * __kmp_speculative_statsfile = "-"; #if OMP_40_ENABLED int __kmp_display_env = FALSE; int __kmp_display_env_verbose = FALSE; +int __kmp_omp_cancellation = FALSE; #endif /* map OMP 3.0 schedule types with our internal schedule types */ @@ -266,9 +266,6 @@ int __kmp_duplicate_library_ok = 0; #if USE_ITT_BUILD int __kmp_forkjoin_frames = 1; int __kmp_forkjoin_frames_mode = 0; -FILE * __kmp_itt_csv_file; -kmp_str_buf_t __kmp_itt_frame_buffer; - #endif PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined; int __kmp_determ_red = FALSE; diff --git a/openmp/runtime/src/kmp_gsupport.c b/openmp/runtime/src/kmp_gsupport.c index 33e8fba5d66..9d8e5530cd3 100644 --- a/openmp/runtime/src/kmp_gsupport.c +++ b/openmp/runtime/src/kmp_gsupport.c @@ -1,7 +1,7 @@ /* * kmp_gsupport.c - * $Revision: 42181 $ - * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -28,9 +28,10 @@ #define MKLOC(loc,routine) \ static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" }; +#include "kmp_ftn_os.h" void -GOMP_barrier(void) +xexpand(KMP_API_NAME_GOMP_BARRIER)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_barrier"); @@ -58,7 +59,7 @@ extern kmp_critical_name *__kmp_unnamed_critical_addr; void -GOMP_critical_start(void) +xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_start"); @@ -68,7 +69,7 @@ GOMP_critical_start(void) void -GOMP_critical_end(void) +xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_end"); @@ -78,7 +79,7 @@ GOMP_critical_end(void) void -GOMP_critical_name_start(void **pptr) +xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_name_start"); @@ -88,7 +89,7 @@ GOMP_critical_name_start(void **pptr) void -GOMP_critical_name_end(void **pptr) +xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_name_end"); @@ -104,7 +105,7 @@ GOMP_critical_name_end(void **pptr) // void -GOMP_atomic_start(void) +xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void) { int gtid = __kmp_entry_gtid(); KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); @@ -113,7 +114,7 @@ GOMP_atomic_start(void) void -GOMP_atomic_end(void) +xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void) { int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); @@ -122,7 +123,7 @@ GOMP_atomic_end(void) int -GOMP_single_start(void) +xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_single_start"); @@ -141,7 +142,7 @@ GOMP_single_start(void) void * -GOMP_single_copy_start(void) +xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { void *retval; int gtid = __kmp_entry_gtid(); @@ -176,7 +177,7 @@ GOMP_single_copy_start(void) void -GOMP_single_copy_end(void *data) +xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_single_copy_end"); @@ -196,7 +197,7 @@ GOMP_single_copy_end(void *data) void -GOMP_ordered_start(void) +xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_ordered_start"); @@ -206,7 +207,7 @@ GOMP_ordered_start(void) void -GOMP_ordered_end(void) +xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_ordered_end"); @@ -223,7 +224,7 @@ GOMP_ordered_end(void) // (IA-32 architecture) or 64-bit signed (Intel(R) 64). // -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_ARM # define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 # define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 # define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 @@ -287,7 +288,7 @@ __kmp_GOMP_fork_call(ident_t *loc, int gtid, microtask_t wrapper, int argc,...) va_start(ap, argc); rc = __kmp_fork_call(loc, gtid, FALSE, argc, wrapper, __kmp_invoke_task_func, -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX &ap #else ap @@ -305,7 +306,7 @@ __kmp_GOMP_fork_call(ident_t *loc, int gtid, microtask_t wrapper, int argc,...) void -GOMP_parallel_start(void (*task)(void *), void *data, unsigned num_threads) +xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_parallel_start"); @@ -325,7 +326,7 @@ GOMP_parallel_start(void (*task)(void *), void *data, unsigned num_threads) void -GOMP_parallel_end(void) +xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_parallel_end"); @@ -457,31 +458,31 @@ GOMP_parallel_end(void) } -LOOP_START(GOMP_loop_static_start, kmp_sch_static) -LOOP_NEXT(GOMP_loop_static_next, {}) -LOOP_START(GOMP_loop_dynamic_start, kmp_sch_dynamic_chunked) -LOOP_NEXT(GOMP_loop_dynamic_next, {}) -LOOP_START(GOMP_loop_guided_start, kmp_sch_guided_chunked) -LOOP_NEXT(GOMP_loop_guided_next, {}) -LOOP_RUNTIME_START(GOMP_loop_runtime_start, kmp_sch_runtime) -LOOP_NEXT(GOMP_loop_runtime_next, {}) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) +LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) -LOOP_START(GOMP_loop_ordered_static_start, kmp_ord_static) -LOOP_NEXT(GOMP_loop_ordered_static_next, \ +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \ { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(GOMP_loop_ordered_dynamic_start, kmp_ord_dynamic_chunked) -LOOP_NEXT(GOMP_loop_ordered_dynamic_next, \ +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \ { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(GOMP_loop_ordered_guided_start, kmp_ord_guided_chunked) -LOOP_NEXT(GOMP_loop_ordered_guided_next, \ +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \ { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_RUNTIME_START(GOMP_loop_ordered_runtime_start, kmp_ord_runtime) -LOOP_NEXT(GOMP_loop_ordered_runtime_next, \ +LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \ { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) void -GOMP_loop_end(void) +xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) { int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) @@ -493,7 +494,7 @@ GOMP_loop_end(void) void -GOMP_loop_end_nowait(void) +xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) { KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) } @@ -598,26 +599,26 @@ GOMP_loop_end_nowait(void) } -LOOP_START_ULL(GOMP_loop_ull_static_start, kmp_sch_static) -LOOP_NEXT_ULL(GOMP_loop_ull_static_next, {}) -LOOP_START_ULL(GOMP_loop_ull_dynamic_start, kmp_sch_dynamic_chunked) -LOOP_NEXT_ULL(GOMP_loop_ull_dynamic_next, {}) -LOOP_START_ULL(GOMP_loop_ull_guided_start, kmp_sch_guided_chunked) -LOOP_NEXT_ULL(GOMP_loop_ull_guided_next, {}) -LOOP_RUNTIME_START_ULL(GOMP_loop_ull_runtime_start, kmp_sch_runtime) -LOOP_NEXT_ULL(GOMP_loop_ull_runtime_next, {}) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) +LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) -LOOP_START_ULL(GOMP_loop_ull_ordered_static_start, kmp_ord_static) -LOOP_NEXT_ULL(GOMP_loop_ull_ordered_static_next, \ +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \ { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(GOMP_loop_ull_ordered_dynamic_start, kmp_ord_dynamic_chunked) -LOOP_NEXT_ULL(GOMP_loop_ull_ordered_dynamic_next, \ +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \ { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(GOMP_loop_ull_ordered_guided_start, kmp_ord_guided_chunked) -LOOP_NEXT_ULL(GOMP_loop_ull_ordered_guided_next, \ +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \ { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_RUNTIME_START_ULL(GOMP_loop_ull_ordered_runtime_start, kmp_ord_runtime) -LOOP_NEXT_ULL(GOMP_loop_ull_ordered_runtime_next, \ +LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \ { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) @@ -659,10 +660,10 @@ LOOP_NEXT_ULL(GOMP_loop_ull_ordered_runtime_next, \ } -PARALLEL_LOOP_START(GOMP_parallel_loop_static_start, kmp_sch_static) -PARALLEL_LOOP_START(GOMP_parallel_loop_dynamic_start, kmp_sch_dynamic_chunked) -PARALLEL_LOOP_START(GOMP_parallel_loop_guided_start, kmp_sch_guided_chunked) -PARALLEL_LOOP_START(GOMP_parallel_loop_runtime_start, kmp_sch_runtime) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), kmp_sch_static) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), kmp_sch_guided_chunked) +PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), kmp_sch_runtime) #if OMP_30_ENABLED @@ -674,7 +675,7 @@ PARALLEL_LOOP_START(GOMP_parallel_loop_runtime_start, kmp_sch_runtime) // void -GOMP_task(void (*func)(void *), void *data, void (*copy_func)(void *, void *), +xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *), long arg_size, long arg_align, int if_cond, unsigned gomp_flags) { MKLOC(loc, "GOMP_task"); @@ -728,7 +729,7 @@ GOMP_task(void (*func)(void *), void *data, void (*copy_func)(void *, void *), void -GOMP_taskwait(void) +xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) { MKLOC(loc, "GOMP_taskwait"); int gtid = __kmp_entry_gtid(); @@ -759,7 +760,7 @@ GOMP_taskwait(void) // unsigned -GOMP_sections_start(unsigned count) +xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) { int status; kmp_int lb, ub, stride; @@ -786,7 +787,7 @@ GOMP_sections_start(unsigned count) unsigned -GOMP_sections_next(void) +xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) { int status; kmp_int lb, ub, stride; @@ -811,7 +812,7 @@ GOMP_sections_next(void) void -GOMP_parallel_sections_start(void (*task) (void *), void *data, +xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data, unsigned num_threads, unsigned count) { int gtid = __kmp_entry_gtid(); @@ -839,7 +840,7 @@ GOMP_parallel_sections_start(void (*task) (void *), void *data, void -GOMP_sections_end(void) +xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) { int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) @@ -851,11 +852,175 @@ GOMP_sections_end(void) void -GOMP_sections_end_nowait(void) +xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) { KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) } +// libgomp has an empty function for GOMP_taskyield as of 2013-10-10 +void +xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void) +{ + +} + +/* + The following sections of code create aliases for the GOMP_* functions, + then create versioned symbols using the assembler directive .symver. + This is only pertinent for ELF .so library + xaliasify and xversionify are defined in kmp_ftn_os.h +*/ + +#if KMP_OS_LINUX + +// GOMP_1.0 aliases +xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10); +xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_BARRIER, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10); +xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10); +xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10); +xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10); +xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10); +xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10); +xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10); +xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10); + +// GOMP_2.0 aliases +#if OMP_30_ENABLED +xaliasify(KMP_API_NAME_GOMP_TASK, 20); +xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20); +#endif +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20); +xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20); + +// GOMP_3.0 aliases +xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30); + +// GOMP_4.0 aliases +/* TODO: add GOMP_4.0 aliases when corresponding + GOMP_* functions are implemented +*/ + +// GOMP_1.0 versioned symbols +xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0"); +xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0"); + +// GOMP_2.0 versioned symbols +#if OMP_30_ENABLED +xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0"); +#endif +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0"); +xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0"); + +// GOMP_3.0 versioned symbols +xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0"); + +// GOMP_4.0 versioned symbols +/* TODO: add GOMP_4.0 versioned symbols when corresponding + GOMP_* functions are implemented +*/ + +#endif /* KMP_OS_LINUX */ + #ifdef __cplusplus } //extern "C" #endif // __cplusplus diff --git a/openmp/runtime/src/kmp_i18n.c b/openmp/runtime/src/kmp_i18n.c index e23e9f1af5d..5cca6e816d3 100644 --- a/openmp/runtime/src/kmp_i18n.c +++ b/openmp/runtime/src/kmp_i18n.c @@ -1,7 +1,7 @@ /* * kmp_i18n.c - * $Revision: 42181 $ - * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -668,7 +668,7 @@ __kmp_i18n_catgets( void __kmp_i18n_dump_catalog( - kmp_str_buf_t & buffer + kmp_str_buf_t * buffer ) { struct kmp_i18n_id_range_t { @@ -676,7 +676,7 @@ __kmp_i18n_dump_catalog( kmp_i18n_id_t last; }; // struct kmp_i18n_id_range_t - static kmp_i18n_id_range_t ranges[] = { + static struct kmp_i18n_id_range_t ranges[] = { { kmp_i18n_prp_first, kmp_i18n_prp_last }, { kmp_i18n_str_first, kmp_i18n_str_last }, { kmp_i18n_fmt_first, kmp_i18n_fmt_last }, @@ -684,18 +684,20 @@ __kmp_i18n_dump_catalog( { kmp_i18n_hnt_first, kmp_i18n_hnt_last } }; // ranges - int num_of_ranges = sizeof( ranges ) / sizeof( kmp_i18n_id_range_t ); + int num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t ); int range; kmp_i18n_id_t id; for ( range = 0; range < num_of_ranges; ++ range ) { - __kmp_str_buf_print( & buffer, "*** Set #%d ***\n", range + 1 ); - for ( id = kmp_i18n_id_t( ranges[ range ].first + 1 ); id < ranges[ range ].last; id = kmp_i18n_id_t( id + 1 ) ) { - __kmp_str_buf_print( & buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) ); + __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 ); + for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 ); + id < ranges[ range ].last; + id = (kmp_i18n_id_t)( id + 1 ) ) { + __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) ); }; // for id }; // for range - __kmp_printf( "%s", buffer.str ); + __kmp_printf( "%s", buffer->str ); } // __kmp_i18n_dump_catalog diff --git a/openmp/runtime/src/kmp_i18n.h b/openmp/runtime/src/kmp_i18n.h index 9392e62b3d8..fea8de42648 100644 --- a/openmp/runtime/src/kmp_i18n.h +++ b/openmp/runtime/src/kmp_i18n.h @@ -1,7 +1,7 @@ /* * kmp_i18n.h - * $Revision: 42061 $ - * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -183,7 +183,7 @@ void __kmp_msg( kmp_msg_severity_t severity, kmp_msg_t message, ... ); } #ifdef KMP_DEBUG - void __kmp_i18n_dump_catalog( kmp_str_buf_t & buffer ); + void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer ); #endif // KMP_DEBUG #ifdef __cplusplus diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h index ced8fc8f871..0ee79b6fe5d 100644 --- a/openmp/runtime/src/kmp_itt.h +++ b/openmp/runtime/src/kmp_itt.h @@ -1,8 +1,8 @@ #if USE_ITT_BUILD /* * kmp_itt.h -- ITT Notify interface. - * $Revision: 42616 $ - * $Date: 2013-08-26 11:47:32 -0500 (Mon, 26 Aug 2013) $ + * $Revision: 42829 $ + * $Date: 2013-11-21 05:44:01 -0600 (Thu, 21 Nov 2013) $ */ @@ -59,6 +59,9 @@ __kmp_inline void __kmp_itt_region_forking( int gtid, int serialized = 0 ); // __kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads. // (*) Note: A thread may execute tasks after this point, though. +// --- Frame reporting --- +__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc ); + // --- Barrier reporting --- __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); __kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object ); @@ -265,6 +268,6 @@ __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) -# define USE_ITT_BUILD_ARG(x) +# define USE_ITT_BUILD_ARG(x) #endif /* USE_ITT_BUILD */ diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl index bedcca1001e..6976e7f637b 100644 --- a/openmp/runtime/src/kmp_itt.inl +++ b/openmp/runtime/src/kmp_itt.inl @@ -1,8 +1,8 @@ #if USE_ITT_BUILD /* * kmp_itt.inl -- Inline functions of ITT Notify. - * $Revision: 42616 $ - * $Date: 2013-08-26 11:47:32 -0500 (Mon, 26 Aug 2013) $ + * $Revision: 42866 $ + * $Date: 2013-12-10 15:15:58 -0600 (Tue, 10 Dec 2013) $ */ @@ -49,6 +49,20 @@ # define LINKAGE static inline #endif +// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this +// API to support user-defined synchronization primitives, but does not use ZCA; +// it would be safe to turn this off until wider support becomes available. +#if USE_ITT_ZCA +#ifdef __INTEL_COMPILER +# if __INTEL_COMPILER >= 1200 +# undef __itt_sync_acquired +# undef __itt_sync_releasing +# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr) +# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr) +# endif +#endif +#endif + /* ------------------------------------------------------------------------------------------------ Parallel region reporting. @@ -79,10 +93,6 @@ __kmp_itt_region_forking( int gtid, int serialized ) { #if USE_ITT_NOTIFY kmp_team_t * team = __kmp_team_from_gtid( gtid ); #if OMP_30_ENABLED - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%d, serialized:%d, empty:%d\n", gtid, - __kmp_threads[gtid]->th.th_ident->reserved_2 - 1, serialized, - (team->t.t_active_level + serialized > 1) ); if (team->t.t_active_level + serialized > 1) #endif { @@ -116,13 +126,19 @@ __kmp_itt_region_forking( int gtid, int serialized ) { str_loc.line, str_loc.col); __kmp_str_loc_free( &str_loc ); + __itt_suppress_push(__itt_suppress_memory_errors); __kmp_itt_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + __kmp_str_free( &buff ); __itt_frame_begin_v3(__kmp_itt_domains[ frm ], NULL); } } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS __itt_frame_begin_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL); } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%d, serialized:%d, loc:%p\n", + gtid, loc->reserved_2 - 1, serialized, loc ); } #endif } // __kmp_itt_region_forking @@ -130,6 +146,51 @@ __kmp_itt_region_forking( int gtid, int serialized ) { // ------------------------------------------------------------------------------------------------- LINKAGE void +__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc ) { +#if USE_ITT_NOTIFY + if (loc) { + if (loc->reserved_2 == 0) { + if (__kmp_frame_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( & __kmp_frame_domain_count ); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( & __kmp_frame_domain_count ); // revert the count + return; // loc->reserved_2 is still 0 + } + // Should it be synchronized? See the comment in __kmp_itt_region_forking + loc->reserved_2 = frm + 1; // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "<func>$omp$frame@[file:]<line>[:<col>]" + const char * buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); + if( imbalance ) { + buff = __kmp_str_format("%s$omp$barrier-imbalance@%s:%d", + str_loc.func, str_loc.file, str_loc.col); + } else { + buff = __kmp_str_format("%s$omp$barrier@%s:%d", + str_loc.func, str_loc.file, str_loc.col); + } + __kmp_str_loc_free( &str_loc ); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_domains[ frm ] = __itt_domain_create( buff ); + __itt_suppress_pop(); + + __kmp_str_free( &buff ); + __itt_frame_submit_v3(__kmp_itt_domains[ frm ], NULL, begin, end ); + } + } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS + __itt_frame_submit_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL, begin, end ); + } + } + +#endif +} // __kmp_itt_frame_submit + +// ------------------------------------------------------------------------------------------------- + +LINKAGE void __kmp_itt_region_starting( int gtid ) { #if USE_ITT_NOTIFY #endif @@ -150,10 +211,6 @@ __kmp_itt_region_joined( int gtid, int serialized ) { #if USE_ITT_NOTIFY kmp_team_t * team = __kmp_team_from_gtid( gtid ); #if OMP_30_ENABLED - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%d, serialized:%d, empty:%d\n", gtid, - __kmp_threads[gtid]->th.th_ident->reserved_2 - 1, serialized, - (team->t.t_active_level + serialized > 1) ); if (team->t.t_active_level + serialized > 1) #endif { @@ -162,7 +219,10 @@ __kmp_itt_region_joined( int gtid, int serialized ) { } ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; if (loc && loc->reserved_2 && loc->reserved_2 <= KMP_MAX_FRAME_DOMAINS) { + KMP_ITT_DEBUG_LOCK(); __itt_frame_end_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL); + KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%d, serialized:%d, loc:%p\n", + gtid, loc->reserved_2 - 1, serialized, loc ); } #endif } // __kmp_itt_region_joined @@ -577,7 +637,7 @@ __kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { void __kmp_itt_single_start( int gtid ) { #if USE_ITT_NOTIFY - if ( __itt_mark_create_ptr ) { + if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) ); ident_t * loc = thr->th.th_ident; char const * src = ( loc == NULL ? NULL : loc->psource ); diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index d0420194688..766cf830fec 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -1,7 +1,7 @@ /* * kmp_lock.cpp -- lock-related functions - * $Revision: 42613 $ - * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -23,7 +23,7 @@ #include "kmp_lock.h" #include "kmp_io.h" -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) # include <unistd.h> # include <sys/syscall.h> // We should really include <futex.h>, but that causes compatibility problems on different @@ -398,7 +398,7 @@ __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) /* ------------------------------------------------------------------------ */ /* futex locks */ @@ -755,7 +755,7 @@ __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) __kmp_destroy_nested_futex_lock( lck ); } -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) /* ------------------------------------------------------------------------ */ @@ -2199,10 +2199,10 @@ __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) // We need a fence here, since we must ensure that no memory operations // from later in this thread float above that read. -#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER ) - __sync_synchronize(); -#else +#if KMP_COMPILER_ICC _mm_mfence(); +#else + __sync_synchronize(); #endif return res; @@ -3167,7 +3167,7 @@ void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) } break; -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) case lk_futex: { __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); @@ -3238,7 +3238,7 @@ void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) } break; -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) case lk_ticket: { __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index bb80b5a57a3..5191cea1264 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -1,7 +1,7 @@ /* * kmp_lock.h -- lock header file - * $Revision: 42590 $ - * $Date: 2013-08-13 20:55:19 -0500 (Tue, 13 Aug 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -174,7 +174,7 @@ extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) // ---------------------------------------------------------------------------- // futex locks. futex locks are only available on Linux* OS. @@ -224,7 +224,7 @@ extern void __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gt extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) // ---------------------------------------------------------------------------- @@ -479,31 +479,31 @@ typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; #define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) -inline void +static inline void __kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); } -inline int +static inline int __kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); } -inline void +static inline void __kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); } -inline void +static inline void __kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_init_ticket_lock( lck ); } -inline void +static inline void __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_destroy_ticket_lock( lck ); @@ -524,31 +524,31 @@ __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) typedef kmp_ticket_lock_t kmp_lock_t; -inline void +static inline void __kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) { __kmp_acquire_ticket_lock( lck, gtid ); } -inline int +static inline int __kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) { return __kmp_test_ticket_lock( lck, gtid ); } -inline void +static inline void __kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) { __kmp_release_ticket_lock( lck, gtid ); } -inline void +static inline void __kmp_init_lock( kmp_lock_t *lck ) { __kmp_init_ticket_lock( lck ); } -inline void +static inline void __kmp_destroy_lock( kmp_lock_t *lck ) { __kmp_destroy_ticket_lock( lck ); @@ -570,7 +570,7 @@ __kmp_destroy_lock( kmp_lock_t *lck ) enum kmp_lock_kind { lk_default = 0, lk_tas, -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) lk_futex, #endif lk_ticket, @@ -587,7 +587,7 @@ extern kmp_lock_kind_t __kmp_user_lock_kind; union kmp_user_lock { kmp_tas_lock_t tas; -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) kmp_futex_lock_t futex; #endif kmp_ticket_lock_t ticket; @@ -606,7 +606,7 @@ extern size_t __kmp_user_lock_size; extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); -inline kmp_int32 +static inline kmp_int32 __kmp_get_user_lock_owner( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); @@ -615,7 +615,7 @@ __kmp_get_user_lock_owner( kmp_user_lock_p lck ) extern void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) #define __kmp_acquire_user_lock_with_checks(lck,gtid) \ if (__kmp_user_lock_kind == lk_tas) { \ @@ -655,7 +655,7 @@ extern void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_ } #else -inline void +static inline void __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); @@ -665,11 +665,11 @@ __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) #include "kmp_i18n.h" /* AC: KMP_FATAL definition */ extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ -inline int +static inline int __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { if ( __kmp_user_lock_kind == lk_tas ) { @@ -688,7 +688,7 @@ __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) } } #else -inline int +static inline int __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); @@ -698,7 +698,7 @@ __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) extern void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); -inline void +static inline void __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); @@ -707,7 +707,7 @@ __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); -inline void +static inline void __kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); @@ -720,7 +720,7 @@ __kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) // extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); -inline void +static inline void __kmp_destroy_user_lock( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); @@ -729,7 +729,7 @@ __kmp_destroy_user_lock( kmp_user_lock_p lck ) extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); -inline void +static inline void __kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); @@ -780,7 +780,7 @@ extern void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lc } #else -inline void +static inline void __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); @@ -791,7 +791,7 @@ __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -inline int +static inline int __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { if ( __kmp_user_lock_kind == lk_tas ) { @@ -820,7 +820,7 @@ __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) } } #else -inline int +static inline int __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); @@ -830,7 +830,7 @@ __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) extern void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); -inline void +static inline void __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); @@ -839,7 +839,7 @@ __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); -inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) +static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); @@ -847,7 +847,7 @@ inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); -inline void +static inline void __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); @@ -875,7 +875,7 @@ extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); -inline const ident_t * +static inline const ident_t * __kmp_get_user_lock_location( kmp_user_lock_p lck ) { if ( __kmp_get_user_lock_location_ != NULL ) { @@ -888,7 +888,7 @@ __kmp_get_user_lock_location( kmp_user_lock_p lck ) extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); -inline void +static inline void __kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) { if ( __kmp_set_user_lock_location_ != NULL ) { @@ -900,7 +900,7 @@ extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); -inline void +static inline void __kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) { if ( __kmp_set_user_lock_flags_ != NULL ) { @@ -962,7 +962,7 @@ typedef struct kmp_block_of_locks kmp_block_of_locks_t; extern kmp_block_of_locks_t *__kmp_lock_blocks; extern int __kmp_num_locks_in_block; -extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags = 0 ); +extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); extern void __kmp_cleanup_user_locks(); diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 9a5d94800f4..f1676052340 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -1,7 +1,7 @@ /* * kmp_os.h -- KPTS runtime header file. - * $Revision: 42588 $ - * $Date: 2013-08-13 01:26:00 -0500 (Tue, 13 Aug 2013) $ + * $Revision: 42820 $ + * $Date: 2013-11-13 16:53:44 -0600 (Wed, 13 Nov 2013) $ */ @@ -42,6 +42,24 @@ # define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE #endif +/* ------------------------- Compiler recognition ---------------------- */ +#define KMP_COMPILER_ICC 0 +#define KMP_COMPILER_GCC 0 +#define KMP_COMPILER_CLANG 0 + +#if defined( __INTEL_COMPILER ) +# undef KMP_COMPILER_ICC +# define KMP_COMPILER_ICC 1 +#elif defined( __clang__ ) +# undef KMP_COMPILER_CLANG +# define KMP_COMPILER_CLANG 1 +#elif defined( __GNUC__ ) +# undef KMP_COMPILER_GCC +# define KMP_COMPILER_GCC 1 +#else +# error Unknown compiler +#endif + /* ---------------------- Operating system recognition ------------------- */ #define KMP_OS_LINUX 0 @@ -90,28 +108,77 @@ # if defined __x86_64 # undef KMP_ARCH_X86_64 # define KMP_ARCH_X86_64 1 -# else +# elif defined __i386 # undef KMP_ARCH_X86 # define KMP_ARCH_X86 1 # endif #endif -#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64) +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7A__) +# define KMP_ARCH_ARMV7 1 +#endif + +#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +# define KMP_ARCH_ARMV6 1 +#endif + +#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define KMP_ARCH_ARMV5 1 +#endif + +#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \ + defined(__ARM_ARCH_4T__) +# define KMP_ARCH_ARMV4 1 +#endif + +#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \ + defined(__ARM_ARCH_3M__) +# define KMP_ARCH_ARMV3 1 +#endif + +#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__) +# define KMP_ARCH_ARMV2 1 +#endif + +#if defined(KMP_ARCH_ARMV2) +# define KMP_ARCH_ARM 1 +#endif + +#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM) # error Unknown or unsupported architecture #endif -#if KMP_OS_WINDOWS -# if defined KMP_WIN_CDECL || !defined GUIDEDLL_EXPORTS -# define USE_FTN_CDECL KMP_FTN_UPPER +/* Check for quad-precision extension. */ +#define KMP_HAVE_QUAD 0 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +# if KMP_COMPILER_ICC + /* _Quad is already defined for icc */ +# undef KMP_HAVE_QUAD +# define KMP_HAVE_QUAD 1 +# elif KMP_COMPILER_CLANG + /* Clang doesn't support a software-implemented + 128-bit extended precision type yet */ + typedef long double _Quad; +# elif KMP_COMPILER_GCC + typedef __float128 _Quad; +# undef KMP_HAVE_QUAD +# define KMP_HAVE_QUAD 1 # endif - -# define KMP_FTN KMP_FTN_PLAIN -# define USE_FTN_EXTRA KMP_FTN_PLAIN -# if KMP_ARCH_X86 -# if defined KMP_WIN_STDCALL || !defined GUIDEDLL_EXPORTS -# define USE_FTN_STDCALL KMP_FTN_UPPER -# endif +#else +# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC + typedef long double _Quad; +# undef KMP_HAVE_QUAD +# define KMP_HAVE_QUAD 1 # endif +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + +#if KMP_OS_WINDOWS typedef char kmp_int8; typedef unsigned char kmp_uint8; typedef short kmp_int16; @@ -143,9 +210,6 @@ #endif /* KMP_OS_WINDOWS */ #if KMP_OS_UNIX -# define KMP_FTN KMP_FTN_PLAIN -# define USE_FTN_CDECL KMP_FTN_PLAIN -# define USE_FTN_EXTRA KMP_FTN_APPEND typedef char kmp_int8; typedef unsigned char kmp_uint8; typedef short kmp_int16; @@ -160,7 +224,7 @@ # define KMP_UINT64_SPEC "llu" #endif /* KMP_OS_UNIX */ -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_ARM # define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 # define KMP_SIZE_T_SPEC KMP_UINT64_SPEC @@ -199,7 +263,7 @@ typedef double kmp_real64; # define KMP_INT_SPEC KMP_INT32_SPEC # define KMP_UINT_SPEC KMP_UINT32_SPEC # define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) -# define KMP_INT_MIN ((kmp_int64)0x80000000) +# define KMP_INT_MIN ((kmp_int32)0x80000000) #endif /* KMP_I8 */ #ifdef __cplusplus @@ -248,14 +312,6 @@ typedef double kmp_real64; //------------------------------------------------------------------------- #endif // __cplusplus -#if KMP_OS_WINDOWS -# define KMP_STDCALL __stdcall -#endif - -#ifndef KMP_STDCALL -# define KMP_STDCALL /* nothing */ -#endif - #define KMP_EXPORT extern /* export declaration in guide libraries */ #if __GNUC__ == 4 @@ -336,7 +392,113 @@ enum kmp_mem_fence_type { // Synchronization primitives // -#if KMP_ASM_INTRINS +#if KMP_ASM_INTRINS && KMP_OS_WINDOWS + +#include <Windows.h> + +#pragma intrinsic(InterlockedExchangeAdd) +#pragma intrinsic(InterlockedCompareExchange) +#pragma intrinsic(InterlockedExchange) +#pragma intrinsic(InterlockedExchange64) + +// +// Using InterlockedIncrement / InterlockedDecrement causes a library loading +// ordering problem, so we use InterlockedExchangeAdd instead. +// +# define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) +# define KMP_TEST_THEN_INC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) +# define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) +# define KMP_TEST_THEN_ADD4_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) +# define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) +# define KMP_TEST_THEN_DEC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) +# define KMP_TEST_THEN_ADD32(p, v) InterlockedExchangeAdd( (volatile long *)(p), (v) ) + +# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) ) + +# define KMP_XCHG_FIXED32(p, v) InterlockedExchange( (volatile long *)(p), (long)(v) ) +# define KMP_XCHG_FIXED64(p, v) InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) ) + +inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) +{ + kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v); + return *(kmp_real32*)&tmp; +} + +// +// Routines that we still need to implement in assembly. +// +extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); + +extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); + +extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); +extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); +extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); +extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); +extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); +extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); + +//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) +//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) +# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) +# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) +//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) +//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) +# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) +# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) +//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) +//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) +# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) +# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) +//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) +# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) + +# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) +# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) +# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) +# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) + +# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) + +# if KMP_ARCH_X86 +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) +# else /* 64 bit pointers */ +# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) +# endif /* KMP_ARCH_X86 */ + +# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) +//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) +# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) + +# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (p), (v) ); +# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); +//# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); +//# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); +//# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); +# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); + + +#elif (KMP_ASM_INTRINS && (KMP_OS_LINUX || KMP_OS_DARWIN)) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) /* cast p to correct type so that proper intrinsic will be used */ # define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) @@ -385,7 +547,7 @@ inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) return *(kmp_real32*)&tmp; } -static kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v) +inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v) { kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v); return *(kmp_real64*)&tmp; @@ -607,6 +769,14 @@ typedef void (*microtask_t)( int *gtid, int *npr, ... ); #endif +// Switches for OSS builds +#ifndef USE_SYSFS_INFO +# define USE_SYSFS_INFO 0 +#endif +#ifndef USE_CMPXCHG_FIX +# define USE_CMPXCHG_FIX 1 +#endif + // Warning levels enum kmp_warnings_level { kmp_warnings_off = 0, /* No warnings */ diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c index 7d66b9bc304..37c372bd89e 100644 --- a/openmp/runtime/src/kmp_runtime.c +++ b/openmp/runtime/src/kmp_runtime.c @@ -1,7 +1,7 @@ /* * kmp_runtime.c -- KPTS runtime support library - * $Revision: 42642 $ - * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $ + * $Revision: 42839 $ + * $Date: 2013-11-24 13:01:00 -0600 (Sun, 24 Nov 2013) $ */ @@ -88,6 +88,8 @@ char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX "perf v106: " #endif /* KMP_DEBUG */ +#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) + /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ @@ -472,8 +474,7 @@ __kmp_wait_sleep( kmp_info_t *this_thr, __kmp_unref_task_team( task_team, this_thr ); } else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) { __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag - USE_ITT_BUILD_ARG( itt_sync_obj ) - ); + USE_ITT_BUILD_ARG( itt_sync_obj ), 0); } }; // if }; // if @@ -994,7 +995,7 @@ DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { } # endif /* KMP_OS_WINDOWS */ -#endif /* GUIDEDLL_EXPORTS +#endif /* GUIDEDLL_EXPORTS */ /* ------------------------------------------------------------------------ */ @@ -1190,10 +1191,8 @@ __kmp_linear_barrier_gather( enum barrier_type bt, register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ]; register int nproc = this_thr -> th.th_team_nproc; register int i; - register kmp_uint new_state; - /* Don't have to worry about sleep bit here or atomic since team setting */ - new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP; + register kmp_uint new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP; /* Collect all the worker team member threads. */ for (i = 1; i < nproc; i++) { @@ -1341,7 +1340,7 @@ __kmp_tree_barrier_gather( enum barrier_type bt, /* Need to update the team arrived pointer if we are the master thread */ if ( nproc > 1 ) - /* New value was already computed in above loop */ + /* New value was already computed above */ team -> t.t_bar[ bt ].b_arrived = new_state; else team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP; @@ -1380,6 +1379,12 @@ __kmp_hyper_barrier_gather( enum barrier_type bt, KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] ); +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Barrier imbalance - save arrive time to the thread + if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) { + this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); + } +#endif /* * We now perform a hypercube-embedded tree gather to wait until all * of the threads have arrived, and reduce any required data @@ -1417,6 +1422,9 @@ __kmp_hyper_barrier_gather( enum barrier_type bt, /* parent threads wait for children to arrive */ + if (new_state == KMP_BARRIER_UNUSED_STATE) + new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP; + for ( child = 1, child_tid = tid + (1 << level); child < branch_factor && child_tid < num_threads; child++, child_tid += (1 << level) ) @@ -1429,10 +1437,6 @@ __kmp_hyper_barrier_gather( enum barrier_type bt, if ( child+1 < branch_factor && next_child_tid < num_threads ) KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived ); #endif /* KMP_CACHE_MANAGE */ - /* Only read this arrived flag once per thread that needs it */ - if (new_state == KMP_BARRIER_UNUSED_STATE) - new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP; - KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " "arrived(%p) == %u\n", gtid, team->t.t_id, tid, @@ -1444,6 +1448,12 @@ __kmp_hyper_barrier_gather( enum barrier_type bt, USE_ITT_BUILD_ARG (itt_sync_obj) ); +#if USE_ITT_BUILD + // Barrier imbalance - write min of the thread time and a child time to the thread. + if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) { + this_thr->th.th_bar_arrive_time = KMP_MIN( this_thr->th.th_bar_arrive_time, child_thr->th.th_bar_arrive_time ); + } +#endif if (reduce) { KA_TRACE( 100, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", @@ -1729,7 +1739,6 @@ __kmp_tree_barrier_release( enum barrier_type bt, /* The reverse versions seem to beat the forward versions overall */ #define KMP_REVERSE_HYPER_BAR -#ifdef KMP_REVERSE_HYPER_BAR static void __kmp_hyper_barrier_release( enum barrier_type bt, kmp_info_t *this_thr, @@ -1751,15 +1760,13 @@ __kmp_hyper_barrier_release( enum barrier_type bt, register kmp_uint32 offset; register kmp_uint32 level; - /* - * We now perform a hypercube-embedded tree release for all - * of the threads that have been gathered, but in the exact - * reverse order from the corresponding gather (for load balance. - */ + /* Perform a hypercube-embedded tree release for all of the threads + that have been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) + the threads are released in the reverse order of the corresponding gather, + otherwise threads are released in the same order. */ if ( ! KMP_MASTER_TID( tid )) { /* worker threads */ - KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) ); @@ -1807,7 +1814,7 @@ __kmp_hyper_barrier_release( enum barrier_type bt, TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) ); + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) ); KMP_MB(); /* Flush all pending memory write invalidates. */ @@ -1822,6 +1829,7 @@ __kmp_hyper_barrier_release( enum barrier_type bt, num_threads = this_thr -> th.th_team_nproc; other_threads = team -> t.t_threads; +#ifdef KMP_REVERSE_HYPER_BAR /* count up to correct level for parent */ for ( level = 0, offset = 1; offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0); @@ -1831,7 +1839,14 @@ __kmp_hyper_barrier_release( enum barrier_type bt, for ( level -= branch_bits, offset >>= branch_bits; offset != 0; level -= branch_bits, offset >>= branch_bits ) +#else + /* Go down the tree, level by level */ + for ( level = 0, offset = 1; + offset < num_threads; + level += branch_bits, offset <<= branch_bits ) +#endif // KMP_REVERSE_HYPER_BAR { +#ifdef KMP_REVERSE_HYPER_BAR /* Now go in reverse order through the children, highest to lowest. Initial setting of child is conservative here. */ child = num_threads >> ((level==0)?level:level-1); @@ -1839,8 +1854,18 @@ __kmp_hyper_barrier_release( enum barrier_type bt, child_tid = tid + (child << level); child >= 1; child--, child_tid -= (1 << level) ) - { +#else + if (((tid >> level) & (branch_factor - 1)) != 0) + /* No need to go any lower than this, since this is the level + parent would be notified */ + break; + /* iterate through children on this level of the tree */ + for ( child = 1, child_tid = tid + (1 << level); + child < branch_factor && child_tid < num_threads; + child++, child_tid += (1 << level) ) +#endif // KMP_REVERSE_HYPER_BAR + { if ( child_tid >= num_threads ) continue; /* child doesn't exist so keep going */ else { register kmp_info_t *child_thr = other_threads[ child_tid ]; @@ -1848,7 +1873,11 @@ __kmp_hyper_barrier_release( enum barrier_type bt, #if KMP_CACHE_MANAGE register kmp_uint32 next_child_tid = child_tid - (1 << level); /* prefetch next thread's go count */ +#ifdef KMP_REVERSE_HYPER_BAR if ( child-1 >= 1 && next_child_tid < num_threads ) +#else + if ( child+1 < branch_factor && next_child_tid < num_threads ) +#endif // KMP_REVERSE_HYPER_BAR KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go ); #endif /* KMP_CACHE_MANAGE */ @@ -1880,154 +1909,6 @@ __kmp_hyper_barrier_release( enum barrier_type bt, gtid, team->t.t_id, tid, bt ) ); } -#else /* !KMP_REVERSE_HYPER_BAR */ - -static void -__kmp_hyper_barrier_release( enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, int propagate_icvs ) -{ - /* handle fork barrier workers who aren't part of a team yet */ - register kmp_team_t *team; - register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb; - register kmp_info_t **other_threads; - register kmp_uint32 num_threads; - register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ]; - register kmp_uint32 branch_factor = 1 << branch_bits; - register kmp_uint32 child; - register kmp_uint32 child_tid; - register kmp_uint32 offset; - register kmp_uint32 level; - - /* - * We now perform a hypercube-embedded tree release for all - * of the threads that have been gathered, but in the same order - * as the gather. - */ - - if ( ! KMP_MASTER_TID( tid )) { - /* worker threads */ - - KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) ); - - /* wait for parent thread to release us */ - __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL ); - -#if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY - if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) { - // we are on a fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 ); - // cancel wait on previous parallel region... - __kmp_itt_task_starting( itt_sync_obj ); - - if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) ) - return; - - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); - if ( itt_sync_obj != NULL ) - __kmp_itt_task_finished( itt_sync_obj ); // call prepare as early as possible for "new" barrier - - } else -#endif /* USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY */ - // - // early exit for reaping threads releasing forkjoin barrier - // - if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) ) - return; - - // - // The worker thread may now assume that the team is valid. - // -#if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY - // libguide only code (cannot use *itt_task* routines) - if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) { - // we are on a fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); - __kmp_itt_barrier_starting( gtid, itt_sync_obj ); // no need to call releasing, but we have paired calls... - } -#endif /* USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY */ - team = __kmp_threads[ gtid ]-> th.th_team; - KMP_DEBUG_ASSERT( team != NULL ); - tid = __kmp_tid_from_gtid( gtid ); - - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, ( team != NULL ) ? team->t.t_id : -1, tid, - &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - } else { /* KMP_MASTER_TID(tid) */ - team = __kmp_threads[ gtid ]-> th.th_team; - KMP_DEBUG_ASSERT( team != NULL ); - - KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt ) ); - } - - /* Now set up team parameters since workers have been released */ - if ( team == NULL ) { - /* handle fork barrier workers who are now part of a team */ - tid = __kmp_tid_from_gtid( gtid ); - team = __kmp_threads[ gtid ]-> th.th_team; - } - num_threads = this_thr -> th.th_team_nproc; - other_threads = team -> t.t_threads; - - /* Go down the tree, level by level */ - for ( level = 0, offset = 1; - offset < num_threads; - level += branch_bits, offset <<= branch_bits ) - { - if (((tid >> level) & (branch_factor - 1)) != 0) - /* No need to go any lower than this, since this is the level - parent would be notified */ - break; - - /* iterate through children on this level of the tree */ - for ( child = 1, child_tid = tid + (1 << level); - child < branch_factor && child_tid < num_threads; - child++, child_tid += (1 << level) ) - { - register kmp_info_t *child_thr = other_threads[ child_tid ]; - register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb; -#if KMP_CACHE_MANAGE - { - register kmp_uint32 next_child_tid = child_tid + (1 << level); - /* prefetch next thread's go count */ - if ( child+1 < branch_factor && next_child_tid < num_threads ) - KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go ); - } -#endif /* KMP_CACHE_MANAGE */ - -#if KMP_BARRIER_ICV_PUSH - if ( propagate_icvs ) { - KMP_DEBUG_ASSERT( team != NULL ); - __kmp_init_implicit_task( team->t.t_ident, - team->t.t_threads[child_tid], team, child_tid, FALSE ); - load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs); - store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs); - sync_icvs(); - } -#endif // KMP_BARRIER_ICV_PUSH - - KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing " - "T#%d(%d:%u) go(%p): %u => %u\n", - gtid, team->t.t_id, tid, - __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, - child_tid, &child_bar -> b_go, child_bar -> b_go, - child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) ); - - /* release child from barrier */ - __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence ); - } - } - - KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt ) ); -} -#endif /* KMP_REVERSE_HYPER_BAR */ - - /* * Internal function to do a barrier. * If is_split is true, do a split barrier, otherwise, do a plain barrier @@ -2043,6 +1924,8 @@ __kmp_barrier( enum barrier_type bt, int gtid, int is_split, register kmp_team_t *team = this_thr -> th.th_team; register int status = 0; + ident_t * tmp_loc = __kmp_threads[ gtid ]->th.th_ident; + KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) ); @@ -2126,34 +2009,23 @@ __kmp_barrier( enum barrier_type bt, int gtid, int is_split, #endif /* OMP_30_ENABLED */ +#if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier - report frame end -#if USE_ITT_BUILD - // Collect information only if the file was opened succesfully. - if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file ) - { - ident_t * loc = this_thr->th.th_ident; - if (loc) { - // Use compiler-generated location to mark the frame: - // "<func>$omp$frame@[file:]<line>[:<col>]" - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - - kmp_uint64 fr_end; -#if defined( __GNUC__ ) -# if !defined( __INTEL_COMPILER ) - fr_end = __kmp_hardware_timestamp(); -# else - fr_end = __rdtsc(); -# endif -#else - fr_end = __rdtsc(); -#endif - K_DIAG( 3, ( "__kmp_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n", - gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) ); - - __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n", - str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end ); - __kmp_str_loc_free( &str_loc ); - this_thr->th.th_frame_time = fr_end; + if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) { + kmp_uint64 tmp = __itt_get_timestamp(); + switch( __kmp_forkjoin_frames_mode ) { + case 1: + __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc ); + this_thr->th.th_frame_time = tmp; + break; + case 2: + __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc ); + break; + case 3: + __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc ); + __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc ); + this_thr->th.th_frame_time = tmp; + break; } } #endif /* USE_ITT_BUILD */ @@ -2465,7 +2337,7 @@ __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, KMP_MB(); /* first, let's setup the master thread */ - master_th -> th.th_info .ds.ds_tid = 0; + master_th -> th.th_info.ds.ds_tid = 0; master_th -> th.th_team = team; master_th -> th.th_team_nproc = team -> t.t_nproc; master_th -> th.th_team_master = master_th; @@ -2514,6 +2386,17 @@ __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, static void __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration +static void +__kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, +#if OMP_30_ENABLED + kmp_internal_control_t * new_icvs, + ident_t * loc +#else + int new_set_nproc, int new_set_dynamic, int new_set_nested, + int new_set_blocktime, int new_bt_intervals, int new_bt_set +#endif // OMP_30_ENABLED + ); // forward declaration + /* most of the work for a fork */ /* return true if we really went parallel, false if serialized */ int @@ -2527,7 +2410,7 @@ __kmp_fork_call( microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX va_list * ap #else va_list ap @@ -2576,7 +2459,6 @@ __kmp_fork_call( #endif - master_th->th.th_ident = loc; #if OMP_40_ENABLED @@ -2590,7 +2472,7 @@ __kmp_fork_call( argv = (void**)parent_team->t.t_argv; for( i=argc-1; i >= 0; --i ) /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX *argv++ = va_arg( *ap, void * ); #else *argv++ = va_arg( ap, void * ); @@ -2686,11 +2568,11 @@ __kmp_fork_call( /* create a serialized parallel region? */ if ( nthreads == 1 ) { /* josh todo: hypothetical question: what do we do for OS X*? */ -#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) +#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM ) void * args[ argc ]; #else void * * args = (void**) alloca( argc * sizeof( void * ) ); -#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) */ +#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM ) */ __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); @@ -2721,7 +2603,7 @@ __kmp_fork_call( if ( ap ) { for( i=argc-1; i >= 0; --i ) /* TODO: revert workaround for Intel(R) 64 tracker #96 */ - #if KMP_ARCH_X86_64 && KMP_OS_LINUX + #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX *argv++ = va_arg( *ap, void * ); #else *argv++ = va_arg( ap, void * ); @@ -2741,7 +2623,7 @@ __kmp_fork_call( argv = args; for( i=argc-1; i >= 0; --i ) /* TODO: revert workaround for Intel(R) 64 tracker #96 */ - #if KMP_ARCH_X86_64 && KMP_OS_LINUX + #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX *argv++ = va_arg( *ap, void * ); #else *argv++ = va_arg( ap, void * ); @@ -2957,7 +2839,7 @@ __kmp_fork_call( #endif /* OMP_40_ENABLED */ for( i=argc-1; i >= 0; --i ) /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX *argv++ = va_arg( *ap, void * ); #else *argv++ = va_arg( ap, void * ); @@ -2977,6 +2859,18 @@ __kmp_fork_call( root -> r.r_active = TRUE; __kmp_fork_team_threads( root, team, master_th, gtid ); + __kmp_setup_icv_copy(team, nthreads +#if OMP_30_ENABLED + , &master_th->th.th_current_task->td_icvs, loc +#else + , parent_team->t.t_set_nproc[master_tid], + parent_team->t.t_set_dynamic[master_tid], + parent_team->t.t_set_nested[master_tid], + parent_team->t.t_set_blocktime[master_tid], + parent_team->t.t_set_bt_intervals[master_tid], + parent_team->t.t_set_bt_set[master_tid] +#endif /* OMP_30_ENABLED */ + ); __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); @@ -2992,23 +2886,12 @@ __kmp_fork_call( __kmp_itt_region_forking( gtid ); #endif /* USE_ITT_BUILD */ +#if USE_ITT_BUILD && USE_ITT_NOTIFY && OMP_30_ENABLED // Internal fork - report frame begin -#if USE_ITT_BUILD - // Collect information only if the file was opened succesfully. - if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file ) + if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) { - kmp_uint64 fr_begin; -#if defined( __GNUC__ ) -# if !defined( __INTEL_COMPILER ) - fr_begin = __kmp_hardware_timestamp(); -# else - fr_begin = __rdtsc(); -# endif -#else - fr_begin = __rdtsc(); -#endif if( ! ( team->t.t_active_level > 1 ) ) { - master_th->th.th_frame_time = fr_begin; + master_th->th.th_frame_time = __itt_get_timestamp(); } } #endif /* USE_ITT_BUILD */ @@ -3134,7 +3017,10 @@ __kmp_join_call(ident_t *loc, int gtid // Either not in teams or exiting teams region // (teams is a frame and no other frames inside the teams) # endif /* OMP_40_ENABLED */ + { + master_th->th.th_ident = loc; __kmp_itt_region_joined( gtid ); + } #endif /* USE_ITT_BUILD */ #if OMP_40_ENABLED @@ -4644,6 +4530,7 @@ __kmp_register_root( int initial_thread ) root -> r.r_root_team -> t.t_threads[0] = root_thread; root -> r.r_hot_team -> t.t_threads[0] = root_thread; root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread; + root_thread -> th.th_serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). root -> r.r_uber_thread = root_thread; /* initialize the thread, get it ready to go */ @@ -5007,6 +4894,19 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) TCW_4( __kmp_init_monitor, 1 ); __kmp_create_monitor( & __kmp_monitor ); KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); + #if KMP_OS_WINDOWS + // AC: wait until monitor has started. This is a fix for CQ232808. + // The reason is that if the library is loaded/unloaded in a loop with small (parallel) + // work in between, then there is high probability that monitor thread started after + // the library shutdown. At shutdown it is too late to cope with the problem, because + // when the master is in DllMain (process detach) the monitor has no chances to start + // (it is blocked), and master has no means to inform the monitor that the library has gone, + // because all the memory which the monitor can access is going to be released/reset. + while ( TCR_4(__kmp_init_monitor) < 2 ) { + KMP_YIELD( TRUE ); + } + KF_TRACE( 10, ( "after monitor thread has started\n" ) ); + #endif } __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); } @@ -5049,6 +4949,7 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) 0 ); } KMP_ASSERT ( serial_team ); + serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). serial_team -> t.t_threads[0] = new_thr; KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", new_thr ) ); @@ -5144,76 +5045,94 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ static void -__kmp_reinitialize_team( - kmp_team_t * team, - int new_nproc, - #if OMP_30_ENABLED - kmp_internal_control_t * new_icvs, - ident_t * loc - #else - int new_set_nproc, int new_set_dynamic, int new_set_nested, - int new_set_blocktime, int new_bt_intervals, int new_bt_set - #endif // OMP_30_ENABLED -) { - int f; - #if OMP_30_ENABLED - KMP_DEBUG_ASSERT( team && new_nproc && new_icvs ); - KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); - team->t.t_ident = loc; - #else - KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc ); - #endif // OMP_30_ENABLED +__kmp_reinitialize_team( kmp_team_t *team, +#if OMP_30_ENABLED + kmp_internal_control_t *new_icvs, ident_t *loc +#else + int new_set_nproc, int new_set_dynamic, int new_set_nested, + int new_set_blocktime, int new_bt_intervals, int new_bt_set +#endif + ) { + KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", + team->t.t_threads[0], team ) ); +#if OMP_30_ENABLED + KMP_DEBUG_ASSERT( team && new_icvs); + KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); + team->t.t_ident = loc; +#else + KMP_DEBUG_ASSERT( team && new_set_nproc ); +#endif // OMP_30_ENABLED team->t.t_id = KMP_GEN_TEAM_ID(); -#if KMP_BARRIER_ICV_PULL - // - // Copy the ICV's to the team structure, where all of the worker threads - // can access them and make their own copies after the barrier. - // + // Copy ICVs to the master thread's implicit taskdata +#if OMP_30_ENABLED load_icvs(new_icvs); - store_icvs(&team->t.t_initial_icvs, new_icvs); - - // - // Set up the master thread's copy of the ICV's. __kmp_fork_call() - // assumes they are already set in the master thread. - // FIXME - change that code to use the team->t.t_initial_icvs copy - // and eliminate this copy. - // __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); sync_icvs(); - KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team ) ); +# else + team -> t.t_set_nproc[0] = new_set_nproc; + team -> t.t_set_dynamic[0] = new_set_dynamic; + team -> t.t_set_nested[0] = new_set_nested; + team -> t.t_set_blocktime[0] = new_set_blocktime; + team -> t.t_set_bt_intervals[0] = new_bt_intervals; + team -> t.t_set_bt_set[0] = new_bt_set; +# endif // OMP_30_ENABLED -#elif KMP_BARRIER_ICV_PUSH - // - // Set the ICV's in the master thread only. - // They will be propagated by the fork barrier. - // - __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); + KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", + team->t.t_threads[0], team ) ); +} + +static void +__kmp_setup_icv_copy(kmp_team_t * team, int new_nproc, +#if OMP_30_ENABLED + kmp_internal_control_t * new_icvs, + ident_t * loc +#else + int new_set_nproc, int new_set_dynamic, int new_set_nested, + int new_set_blocktime, int new_bt_intervals, int new_bt_set +#endif // OMP_30_ENABLED + ) +{ + int f; + +#if OMP_30_ENABLED + KMP_DEBUG_ASSERT( team && new_nproc && new_icvs ); + KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); +#else + KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc ); +#endif // OMP_30_ENABLED + + // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team. + // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called. +#if KMP_BARRIER_ICV_PULL + // Copy the ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where all of the + // worker threads can access them and make their own copies after the barrier. load_icvs(new_icvs); - store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); + KMP_DEBUG_ASSERT(team->t.t_threads[0]); // the threads arrays should be allocated at this point + store_icvs(&team->t.t_threads[0]->th.th_fixed_icvs, new_icvs); sync_icvs(); + KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team)); - KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team ) ); +#elif KMP_BARRIER_ICV_PUSH + // The ICVs will be propagated in the fork barrier, so nothing needs to be done here. + KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team)); #else - // - // Copy the icvs to each of the threads. This takes O(nthreads) time. - // -#if OMP_30_ENABLED + // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time. +# if OMP_30_ENABLED load_icvs(new_icvs); -#endif - for( f=0 ; f<new_nproc ; f++) { +# endif // OMP_30_ENABLED + KMP_DEBUG_ASSERT(team->t.t_threads[0]); // the threads arrays should be allocated at this point + for(f=1 ; f<new_nproc ; f++) { // skip the master thread # if OMP_30_ENABLED // TODO: GEH - pass in better source location info since usually NULL here - KF_TRACE( 10, ( "__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n", + KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", f, team->t.t_threads[f], team ) ); __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE ); store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); - KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n", + KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", f, team->t.t_threads[f], team ) ); # else team -> t.t_set_nproc[f] = new_set_nproc; @@ -5226,9 +5145,8 @@ __kmp_reinitialize_team( } # if OMP_30_ENABLED sync_icvs(); -# endif -#endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL - +# endif // OMP_30_ENABLED +#endif // KMP_BARRIER_ICV_PULL } /* initialize the team data structure @@ -5246,6 +5164,8 @@ __kmp_initialize_team( int new_set_blocktime, int new_bt_intervals, int new_bt_set #endif // OMP_30_ENABLED ) { + KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); + /* verify */ KMP_DEBUG_ASSERT( team ); KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); @@ -5290,18 +5210,18 @@ __kmp_initialize_team( team -> t.t_control_stack_top = NULL; - __kmp_reinitialize_team( - team, new_nproc, - #if OMP_30_ENABLED - new_icvs, - loc - #else - new_set_nproc, new_set_dynamic, new_set_nested, - new_set_blocktime, new_bt_intervals, new_bt_set - #endif // OMP_30_ENABLED - ); + __kmp_reinitialize_team( team, +#if OMP_30_ENABLED + new_icvs, loc +#else + new_set_nproc, new_set_dynamic, new_set_nested, + new_set_blocktime, new_bt_intervals, new_bt_set +#endif // OMP_30_ENABLED + ); + KMP_MB(); + KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); } #if KMP_OS_LINUX @@ -5700,15 +5620,15 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, // TODO???: team -> t.t_max_active_levels = new_max_active_levels; team -> t.t_sched = new_icvs->sched; #endif - __kmp_reinitialize_team( team, new_nproc, + __kmp_reinitialize_team( team, #if OMP_30_ENABLED - new_icvs, - root->r.r_uber_thread->th.th_ident + new_icvs, root->r.r_uber_thread->th.th_ident #else - new_set_nproc, new_set_dynamic, new_set_nested, - new_set_blocktime, new_bt_intervals, new_bt_set -#endif - ); + new_set_nproc, new_set_dynamic, new_set_nested, + new_set_blocktime, new_bt_intervals, new_bt_set +#endif // OMP_30_ENABLED + ); + #if OMP_30_ENABLED if ( __kmp_tasking_mode != tskm_immediate_exec ) { @@ -5768,15 +5688,14 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, if(team -> t.t_max_nproc < new_nproc) { /* reallocate larger arrays */ __kmp_reallocate_team_arrays(team, new_nproc); - __kmp_reinitialize_team( team, new_nproc, + __kmp_reinitialize_team( team, #if OMP_30_ENABLED - new_icvs, - NULL // TODO: !!! + new_icvs, NULL #else - new_set_nproc, new_set_dynamic, new_set_nested, - new_set_blocktime, new_bt_intervals, new_bt_set -#endif - ); + new_set_nproc, new_set_dynamic, new_set_nested, + new_set_blocktime, new_bt_intervals, new_bt_set +#endif // OMP_30_ENABLED + ); } #if KMP_OS_LINUX @@ -5859,8 +5778,8 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, # endif #endif - } - else { + } + else { KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); #if KMP_MIC // This case can mean that omp_set_num_threads() was called and the hot team size @@ -5877,15 +5796,14 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, team -> t.t_sched = new_icvs->sched; #endif - __kmp_reinitialize_team( team, new_nproc, + __kmp_reinitialize_team( team, #if OMP_30_ENABLED - new_icvs, - root->r.r_uber_thread->th.th_ident + new_icvs, root->r.r_uber_thread->th.th_ident #else - new_set_nproc, new_set_dynamic, new_set_nested, - new_set_blocktime, new_bt_intervals, new_bt_set -#endif - ); + new_set_nproc, new_set_dynamic, new_set_nested, + new_set_blocktime, new_bt_intervals, new_bt_set +#endif // OMP_30_ENABLED + ); #if OMP_30_ENABLED KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", @@ -6000,6 +5918,8 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, * up seems to really hurt performance a lot on the P4, so, let's not use * this... */ __kmp_allocate_team_arrays( team, max_nproc ); + + KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); __kmp_initialize_team( team, new_nproc, #if OMP_30_ENABLED new_icvs, @@ -6293,7 +6213,6 @@ __kmp_join_barrier( int gtid ) KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid )); - #if OMP_30_ENABLED if ( __kmp_tasking_mode == tskm_extra_barrier ) { __kmp_tasking_barrier( team, this_thr, gtid ); @@ -6329,25 +6248,6 @@ __kmp_join_barrier( int gtid ) #endif // OMP_30_ENABLED } - #if KMP_OS_WINDOWS - // AC: wait here until monitor has started. This is a fix for CQ232808. - // The reason is that if the library is loaded/unloaded in a loop with small (parallel) - // work in between, then there is high probability that monitor thread started after - // the library shutdown. At shutdown it is too late to cope with the problem, because - // when the master is in DllMain (process detach) the monitor has no chances to start - // (it is blocked), and master has no means to inform the monitor that the library has gone, - // because all the memory which the monitor can access is going to be released/reset. - // - // The moment before barrier_gather sounds appropriate, because master needs to - // wait for all workers anyway, and we want this to happen as late as possible, - // but before the shutdown which may happen after the barrier. - if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) { - __kmp_wait_sleep( this_thr, (volatile kmp_uint32*)&__kmp_init_monitor, 2, 0 - USE_ITT_BUILD_ARG( itt_sync_obj ) - ); - } - #endif - #if USE_ITT_BUILD if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) __kmp_itt_barrier_starting( gtid, itt_sync_obj ); @@ -6390,34 +6290,22 @@ __kmp_join_barrier( int gtid ) USE_ITT_BUILD_ARG( itt_sync_obj ) ); } +#if USE_ITT_BUILD && USE_ITT_NOTIFY // Join barrier - report frame end -#if USE_ITT_BUILD - // Collect information only if the file was opened successfully. - if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file ) - { - ident_t * loc = this_thr->th.th_ident; - if (loc) { - // Use compiler-generated location to mark the frame: - // "<func>$omp$frame@[file:]<line>[:<col>]" - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - - kmp_uint64 fr_end; -#if defined( __GNUC__ ) -# if !defined( __INTEL_COMPILER ) - fr_end = __kmp_hardware_timestamp(); -# else - fr_end = __rdtsc(); -# endif -#else - fr_end = __rdtsc(); -#endif - K_DIAG( 3, ( "__kmp_join_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n", - gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) ); - - __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n", - str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end ); - - __kmp_str_loc_free( &str_loc ); + if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) { + kmp_uint64 tmp = __itt_get_timestamp(); + ident_t * loc = team->t.t_ident; + switch( __kmp_forkjoin_frames_mode ) { + case 1: + __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc ); + break; + case 2: + __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc ); + break; + case 3: + __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc ); + __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc ); + break; } } #endif /* USE_ITT_BUILD */ @@ -6571,20 +6459,16 @@ __kmp_fork_barrier( int gtid, int tid ) #if OMP_30_ENABLED # if KMP_BARRIER_ICV_PULL - // - // FIXME - after __kmp_fork_call() is modified to not look at the - // master thread's implicit task ICV's, remove the ! KMP_MASTER_TID - // restriction from this if condition. - // - if (! KMP_MASTER_TID( tid ) ) { - // - // Copy the initial ICV's from the team struct to the implicit task - // for this tid. - // - __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid], - team, tid, FALSE ); - load_icvs(&team->t.t_initial_icvs); - store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_initial_icvs); + // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team. + // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called. + // We cannot modify __kmp_fork_call() to look at the fixed ICVs in the master's thread struct, because it is + // not always the case that the threads arrays have been allocated when __kmp_fork_call() is executed. + if (! KMP_MASTER_TID( tid ) ) { // master thread already has ICVs + // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. + KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid )); + load_icvs(&team->t.t_threads[0]->th.th_fixed_icvs); + __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE ); + store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_threads[0]->th.th_fixed_icvs); sync_icvs(); } # endif // KMP_BARRIER_ICV_PULL @@ -6716,13 +6600,13 @@ __kmp_launch_thread( kmp_info_t *this_thr ) void __kmp_internal_end_dest( void *specific_gtid ) { - #ifdef __INTEL_COMPILER + #if KMP_COMPILER_ICC #pragma warning( push ) #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits #endif // Make sure no significant bits are lost int gtid = (kmp_intptr_t)specific_gtid - 1; - #ifdef __INTEL_COMPILER + #if KMP_COMPILER_ICC #pragma warning( pop ) #endif @@ -7503,7 +7387,6 @@ __kmp_do_serial_initialize( void ) __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; } __kmp_max_nth = __kmp_sys_max_nth; - __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; @@ -7572,18 +7455,17 @@ __kmp_do_serial_initialize( void ) if ( __kmp_str_match_true( val ) ) { kmp_str_buf_t buffer; __kmp_str_buf_init( & buffer ); - __kmp_i18n_dump_catalog( buffer ); + __kmp_i18n_dump_catalog( & buffer ); __kmp_printf( "%s", buffer.str ); __kmp_str_buf_free( & buffer ); }; // if __kmp_env_free( & val ); #endif + __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); - // omalyshe: This initialisation beats env var setting. - //__kmp_load_balance_interval = 1.0; // If the library is shut down properly, both pools must be NULL. Just in case, set them // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed. @@ -7876,38 +7758,6 @@ __kmp_parallel_initialize( void ) __kmp_print_version_2(); } -#if USE_ITT_BUILD - // Create CSV file to report frames - if( __kmp_forkjoin_frames_mode == 1 ) - { - // Open CSV file to write itt frame information - const char * csv_file; -/* Internal AXE variables - char * host_name = __kmp_env_get("INTEL_MRTE_HOST_NAME"); - char * out_dir = __kmp_env_get("INTEL_MRTE_DATA_DIR");*/ - char * host_name = __kmp_env_get("AMPLXE_HOSTNAME"); - char * out_dir = __kmp_env_get("AMPLXE_DATA_DIR"); - - if( out_dir && host_name ) { - csv_file = __kmp_str_format( "%s/omp-frames-hostname-%s.csv", out_dir, host_name ); - __kmp_itt_csv_file = fopen( csv_file, "w" ); - __kmp_str_free( &csv_file ); - } else { -#ifdef KMP_DEBUG - // Create CSV file in the current dir - csv_file = __kmp_str_format( "./omp-frames-hostname-xxx.csv" ); - __kmp_itt_csv_file = fopen( csv_file, "w" ); - __kmp_str_free( &csv_file ); -#endif - } - if( __kmp_itt_csv_file ) { - __kmp_str_buf_init( & __kmp_itt_frame_buffer ); - __kmp_str_buf_print( & __kmp_itt_frame_buffer, "name,start_tsc.TSC,end_tsc,pid,tid\n" ); - } - } - -#endif /* USE_ITT_BUILD */ - /* we have finished parallel initialization */ TCW_SYNC_4(__kmp_init_parallel, TRUE); @@ -8347,16 +8197,6 @@ __kmp_cleanup( void ) __kmp_i18n_catclose(); -#if USE_ITT_BUILD - // Close CSV file for frames - if( __kmp_forkjoin_frames_mode && __kmp_itt_csv_file ) { - fprintf( __kmp_itt_csv_file, __kmp_itt_frame_buffer.str ); - - __kmp_str_buf_free( & __kmp_itt_frame_buffer ); - fclose( __kmp_itt_csv_file ); - } -#endif /* USE_ITT_BUILD */ - KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); } @@ -8576,14 +8416,6 @@ __kmp_aux_set_defaults( * internal fast reduction routines */ -// implementation rev. 0.4 -// AT: determine CPU, and always use 'critical method' if non-Intel -// AT: test loc != NULL -// AT: what to return if lck == NULL -// AT: tune the cut-off point for atomic reduce method -// AT: tune what to return depending on the CPU and platform configuration -// AT: tune what to return depending on team size -// AT: move this function out to kmp_csupport.c PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), @@ -8641,22 +8473,10 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, #error "Unknown or unsupported OS" #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN - #elif KMP_ARCH_X86 + #elif KMP_ARCH_X86 || KMP_ARCH_ARM #if KMP_OS_LINUX || KMP_OS_WINDOWS - // similar to win_32 - // 4x1x2 fxqlin04, the 'linear,linear' barrier - - // similar to lin_32 - // 4x1x2 fxqwin04, the 'linear,linear' barrier - - // actual measurement shows that the critical section method is better if team_size <= 8; - // what happenes when team_size > 8 ? ( no machine to test ) - - // TO DO: need to run a 32-bit code on Intel(R) 64 - // TO DO: test the 'hyper,hyper,1,1' barrier - // basic tuning if( atomic_available ) { @@ -8667,7 +8487,6 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, #elif KMP_OS_DARWIN - if( atomic_available && ( num_vars <= 3 ) ) { retval = atomic_reduce_block; } else if( tree_available ) { @@ -8686,18 +8505,6 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, } - //AT: TO DO: critical block method not implemented by PAROPT - //if( retval == __kmp_critical_reduce_block ) { - // if( lck == NULL ) { // critical block method not implemented by PAROPT - // } - //} - - // tune what to return depending on the CPU and platform configuration - // (sometimes tree method is slower than critical) - - // probably tune what to return depending on team size - - // KMP_FORCE_REDUCTION if( __kmp_force_reduction_method != reduction_method_not_defined ) { diff --git a/openmp/runtime/src/kmp_settings.c b/openmp/runtime/src/kmp_settings.c index b190cce1486..3a0f6ce80b9 100644 --- a/openmp/runtime/src/kmp_settings.c +++ b/openmp/runtime/src/kmp_settings.c @@ -1,7 +1,7 @@ /* * kmp_settings.c -- Initialize environment variables - * $Revision: 42642 $ - * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $ + * $Revision: 42816 $ + * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $ */ @@ -26,9 +26,6 @@ #include "kmp_io.h" -#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) -#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) - static int __kmp_env_isDefined( char const * name ); static int __kmp_env_toPrint( char const * name, int flag ); @@ -3915,7 +3912,7 @@ __kmp_stg_parse_lock_kind( char const * name, char const * value, void * data ) || __kmp_str_match( "testandset", 2, value ) ) { __kmp_user_lock_kind = lk_tas; } -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( __kmp_str_match( "futex", 1, value ) ) { if ( __kmp_futex_determine_capable() ) { __kmp_user_lock_kind = lk_futex; @@ -4322,6 +4319,16 @@ __kmp_stg_print_omp_display_env( kmp_str_buf_t * buffer, char const * name, void } } // __kmp_stg_print_omp_display_env +static void +__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) { + __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation ); +} // __kmp_stg_parse_omp_cancellation + +static void +__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) { + __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation ); +} // __kmp_stg_print_omp_cancellation + #endif // ------------------------------------------------------------------------------------------------- @@ -4476,6 +4483,7 @@ static kmp_setting_t __kmp_stg_table[] = { # if OMP_40_ENABLED { "OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, __kmp_stg_print_omp_display_env, NULL, 0, 0 }, + { "OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0 }, #endif { "", NULL, NULL, NULL, 0, 0 } }; // settings diff --git a/openmp/runtime/src/kmp_str.c b/openmp/runtime/src/kmp_str.c index d9b98ab9e30..c1f9e9b6406 100644 --- a/openmp/runtime/src/kmp_str.c +++ b/openmp/runtime/src/kmp_str.c @@ -1,7 +1,7 @@ /* * kmp_str.c -- String manipulation routines. - * $Revision: 42613 $ - * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $ + * $Revision: 42810 $ + * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ */ @@ -329,9 +329,9 @@ void __kmp_str_fname_free( kmp_str_fname_t * fname ) { - __kmp_str_free( const_cast< char const ** >( & fname->path ) ); - __kmp_str_free( const_cast< char const ** >( & fname->dir ) ); - __kmp_str_free( const_cast< char const ** >( & fname->base ) ); + __kmp_str_free( (char const **)( & fname->path ) ); + __kmp_str_free( (char const **)( & fname->dir ) ); + __kmp_str_free( (char const **)( & fname->base ) ); } // kmp_str_fname_free diff --git a/openmp/runtime/src/kmp_stub.c b/openmp/runtime/src/kmp_stub.c index e72b196ef72..c1914f4f570 100644 --- a/openmp/runtime/src/kmp_stub.c +++ b/openmp/runtime/src/kmp_stub.c @@ -1,7 +1,7 @@ /* * kmp_stub.c -- stub versions of user-callable OpenMP RT functions. - * $Revision: 42150 $ - * $Date: 2013-03-15 15:40:38 -0500 (Fri, 15 Mar 2013) $ + * $Revision: 42826 $ + * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $ */ @@ -29,11 +29,32 @@ #include <sys/time.h> #endif +#include "omp.h" // Function renamings. #include "kmp.h" // KMP_DEFAULT_STKSIZE #include "kmp_version.h" -#include "omp.h" // Function renamings. +// Moved from omp.h +#if OMP_30_ENABLED +#define omp_set_max_active_levels ompc_set_max_active_levels +#define omp_set_schedule ompc_set_schedule +#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num +#define omp_get_team_size ompc_get_team_size + +#endif // OMP_30_ENABLED + +#define omp_set_num_threads ompc_set_num_threads +#define omp_set_dynamic ompc_set_dynamic +#define omp_set_nested ompc_set_nested +#define kmp_set_stacksize kmpc_set_stacksize +#define kmp_set_stacksize_s kmpc_set_stacksize_s +#define kmp_set_blocktime kmpc_set_blocktime +#define kmp_set_library kmpc_set_library +#define kmp_set_defaults kmpc_set_defaults +#define kmp_malloc kmpc_malloc +#define kmp_calloc kmpc_calloc +#define kmp_realloc kmpc_realloc +#define kmp_free kmpc_free static double frequency = 0.0; @@ -243,29 +264,5 @@ double __kmps_get_wtick( void ) { return wtick; }; // __kmps_get_wtick - -/* - These functions are exported from libraries, but not declared in omp,h and omp_lib.f: - - // omalyshe: eight entries below removed from the library (2011-11-22) - kmpc_get_banner - kmpc_get_poolmode - kmpc_get_poolsize - kmpc_get_poolstat - kmpc_poolprint - kmpc_print_banner - kmpc_set_poolmode - kmpc_set_poolsize - - kmpc_set_affinity - kmp_threadprivate_insert - kmp_threadprivate_insert_private_data - VT_getthid - vtgthid - - The list is collected on lin_32. - -*/ - // end of file // diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c index ea5cdc034af..8cac009b266 100644 --- a/openmp/runtime/src/kmp_tasking.c +++ b/openmp/runtime/src/kmp_tasking.c @@ -1,7 +1,7 @@ /* * kmp_tasking.c -- OpenMP 3.0 tasking support. - * $Revision: 42522 $ - * $Date: 2013-07-16 05:28:49 -0500 (Tue, 16 Jul 2013) $ + * $Revision: 42852 $ + * $Date: 2013-12-04 10:50:49 -0600 (Wed, 04 Dec 2013) $ */ @@ -620,13 +620,28 @@ __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_tas #if OMP_40_ENABLED if ( taskdata->td_taskgroup ) KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); - __kmp_release_deps(gtid,taskdata); + __kmp_release_deps(gtid,taskdata); #endif } KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", gtid, taskdata, children) ); +#if OMP_40_ENABLED + /* If the tasks' destructor thunk flag has been set, we need to invoke the + destructor thunk that has been generated by the compiler. + The code is placed here, since at this point other tasks might have been released + hence overlapping the destructor invokations with some other work in the + released tasks. The OpenMP spec is not specific on when the destructors are + invoked, so we should be free to choose. + */ + if (taskdata->td_flags.destructors_thunk) { + kmp_routine_entry_t destr_thunk = task->destructors; + KMP_ASSERT(destr_thunk); + destr_thunk(gtid, task); + } +#endif // OMP_40_ENABLED + // bookkeeping for resuming task: // GEH - note tasking_ser => task_serial KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == @@ -739,10 +754,10 @@ __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *te task->td_flags.complete = 0; task->td_flags.freed = 0; -#if OMP_40_ENABLED +#if OMP_40_ENABLED task->td_dephash = NULL; task->td_depnode = NULL; -#endif +#endif if (set_curr_task) { // only do this initialization the first time a thread is created task->td_incomplete_child_tasks = 0; @@ -850,7 +865,7 @@ __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, taskdata->td_task_id = KMP_GEN_TASK_ID(); taskdata->td_team = team; - taskdata->td_alloc_thread = thread; + taskdata->td_alloc_thread = thread; taskdata->td_parent = parent_task; taskdata->td_level = parent_task->td_level + 1; // increment nesting level taskdata->td_ident = loc_ref; @@ -863,6 +878,9 @@ __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, taskdata->td_flags.tiedness = flags->tiedness; taskdata->td_flags.final = flags->final; taskdata->td_flags.merged_if0 = flags->merged_if0; +#if OMP_40_ENABLED + taskdata->td_flags.destructors_thunk = flags->destructors_thunk; +#endif // OMP_40_ENABLED taskdata->td_flags.tasktype = TASK_EXPLICIT; // GEH - TODO: fix this to copy parent task's value of tasking_ser flag @@ -890,7 +908,7 @@ __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task taskdata->td_dephash = NULL; taskdata->td_depnode = NULL; -#endif +#endif // Only need to keep track of child task counts if team parallel and tasking not serialized if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) { KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); @@ -946,24 +964,46 @@ static void __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task ) { kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); +#if OMP_40_ENABLED + int discard = 0 /* false */; +#endif KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", gtid, taskdata, current_task) ); __kmp_task_start( gtid, task, current_task ); +#if OMP_40_ENABLED + // TODO: cancel tasks if the parallel region has also been cancelled + // TODO: check if this sequence can be hoisted above __kmp_task_start + // if cancellation has been enabled for this run ... + if (__kmp_omp_cancellation) { + kmp_info_t *this_thr = __kmp_threads [ gtid ]; + kmp_team_t * this_team = this_thr->th.th_team; + kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; + if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { + // this task belongs to a task group and we need to cancel it + discard = 1 /* true */; + } + } + // // Invoke the task routine and pass in relevant data. // Thunks generated by gcc take a different argument list. // + if (!discard) { +#endif // OMP_40_ENABLED #ifdef KMP_GOMP_COMPAT - if (taskdata->td_flags.native) { - ((void (*)(void *))(*(task->routine)))(task->shareds); - } - else + if (taskdata->td_flags.native) { + ((void (*)(void *))(*(task->routine)))(task->shareds); + } + else #endif /* KMP_GOMP_COMPAT */ - { - (*(task->routine))(gtid, task); + { + (*(task->routine))(gtid, task); + } +#if OMP_40_ENABLED } +#endif // OMP_40_ENABLED __kmp_task_finish( gtid, task, current_task ); @@ -1079,10 +1119,8 @@ __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ) // GEH: if team serialized, avoid reading the volatile variable below. while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) { __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks), - 0, FALSE, &thread_finished, -#if USE_ITT_BUILD - itt_sync_obj, -#endif /* USE_ITT_BUILD */ + 0, FALSE, &thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); } } @@ -1134,10 +1172,8 @@ __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); #endif /* USE_ITT_BUILD */ if ( ! taskdata->td_flags.team_serial ) { - __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished, -#if USE_ITT_BUILD - itt_sync_obj, -#endif /* USE_ITT_BUILD */ + __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); } @@ -1162,7 +1198,7 @@ __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) // __kmpc_taskgroup: Start a new taskgroup void -__kmpc_taskgroup( ident* loc, int gtid ) +__kmpc_taskgroup( ident_t* loc, int gtid ) { kmp_info_t * thread = __kmp_threads[ gtid ]; kmp_taskdata_t * taskdata = thread->th.th_current_task; @@ -1170,6 +1206,7 @@ __kmpc_taskgroup( ident* loc, int gtid ) (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) ); KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) ); tg_new->count = 0; + tg_new->cancel_request = cancel_noreq; tg_new->parent = taskdata->td_taskgroup; taskdata->td_taskgroup = tg_new; } @@ -1180,7 +1217,7 @@ __kmpc_taskgroup( ident* loc, int gtid ) // and its descendants are complete void -__kmpc_end_taskgroup( ident* loc, int gtid ) +__kmpc_end_taskgroup( ident_t* loc, int gtid ) { kmp_info_t * thread = __kmp_threads[ gtid ]; kmp_taskdata_t * taskdata = thread->th.th_current_task; @@ -1201,10 +1238,8 @@ __kmpc_end_taskgroup( ident* loc, int gtid ) if ( ! taskdata->td_flags.team_serial ) { while ( TCR_4(taskgroup->count) != 0 ) { __kmp_execute_tasks( thread, gtid, &(taskgroup->count), - 0, FALSE, &thread_finished, -#if USE_ITT_BUILD - itt_sync_obj, -#endif /* USE_ITT_BUILD */ + 0, FALSE, &thread_finished + USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); } } @@ -1420,15 +1455,13 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team // checker is the value to check to terminate the spin. int -__kmp_execute_tasks( kmp_info_t *thread, - kmp_int32 gtid, +__kmp_execute_tasks( kmp_info_t *thread, + kmp_int32 gtid, volatile kmp_uint *spinner, kmp_uint checker, - int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void * itt_sync_obj, -#endif /* USE_ITT_BUILD */ + int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained ) { kmp_task_team_t * task_team; @@ -2297,11 +2330,9 @@ __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) // in team > 1 ! void -__kmp_task_team_wait( kmp_info_t *this_thr, +__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team -#if USE_ITT_BUILD - , void * itt_sync_obj -#endif /* USE_ITT_BUILD */ + USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { kmp_task_team_t *task_team = team->t.t_task_team; @@ -2320,9 +2351,7 @@ __kmp_task_team_wait( kmp_info_t *this_thr, // termination condition. // __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE -#if USE_ITT_BUILD - , itt_sync_obj -#endif /* USE_ITT_BUILD */ + USE_ITT_BUILD_ARG(itt_sync_obj) ); // @@ -2361,7 +2390,8 @@ __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) #if USE_ITT_BUILD KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL ); #endif /* USE_ITT_BUILD */ - while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) { + while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag + USE_ITT_BUILD_ARG(NULL), 0 ) ) { #if USE_ITT_BUILD // TODO: What about itt_sync_obj?? KMP_FSYNC_SPIN_PREPARE( spin ); diff --git a/openmp/runtime/src/kmp_version.c b/openmp/runtime/src/kmp_version.c index 0beb824803a..5d0de7716c0 100644 --- a/openmp/runtime/src/kmp_version.c +++ b/openmp/runtime/src/kmp_version.c @@ -1,7 +1,7 @@ /* * kmp_version.c - * $Revision: 42594 $ - * $Date: 2013-08-16 04:14:33 -0500 (Fri, 16 Aug 2013) $ + * $Revision: 42806 $ + * $Date: 2013-11-05 16:16:45 -0600 (Tue, 05 Nov 2013) $ */ @@ -27,7 +27,7 @@ #define stringer( x ) _stringer( x ) // Detect compiler. -#ifdef __INTEL_COMPILER +#if KMP_COMPILER_ICC #if __INTEL_COMPILER == 1010 #define KMP_COMPILER "Intel C++ Compiler 10.1" #elif __INTEL_COMPILER == 1100 @@ -49,7 +49,9 @@ #elif __INTEL_COMPILER == 9999 #define KMP_COMPILER "Intel C++ Compiler mainline" #endif -#elif defined( __GNUC__ ) +#elif KMP_COMPILER_CLANG + #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ ) +#elif KMP_COMPILER_GCC #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ ) #endif #ifndef KMP_COMPILER diff --git a/openmp/runtime/src/makefile.mk b/openmp/runtime/src/makefile.mk index d7c826685e7..8185e78736b 100644 --- a/openmp/runtime/src/makefile.mk +++ b/openmp/runtime/src/makefile.mk @@ -1,6 +1,6 @@ # makefile.mk # -# $Revision: 42661 $ -# $Date: 2013-09-12 11:37:13 -0500 (Thu, 12 Sep 2013) $ +# $Revision: 42820 $ +# $Date: 2013-11-13 16:53:44 -0600 (Wed, 13 Nov 2013) $ # #//===----------------------------------------------------------------------===// @@ -37,7 +37,7 @@ inc_dir = $(LIBOMP_WORK)src/include/$(OMP_VERSION)/ # -------------------------------------------------------------------------------------------------- # Build compiler -BUILD_COMPILER := $(call check_variable,BUILD_COMPILER,icc gcc icl icl.exe) +BUILD_COMPILER := $(call check_variable,BUILD_COMPILER,icc gcc clang icl icl.exe) # Distribution type: com (commercial) or oss (open-source) DISTRIBUTION := $(call check_variable,DISTRIBUTION,com oss) @@ -161,6 +161,18 @@ ifeq "$(c)" "gcc" endif endif +ifeq "$(c)" "clang" + c-flags += -Wno-unused-value -Wno-switch + cxx-flags += -Wno-unused-value -Wno-switch + ifeq "$(arch)" "32" + c-flags += -m32 -msse + cxx-flags += -m32 -msse + fort-flags += -m32 -msse + ld-flags += -m32 -msse + as-flags += -m32 -msse + endif +endif + ifeq "$(LINK_TYPE)" "dyna" # debug-info ifeq "$(os)" "win" @@ -186,7 +198,7 @@ ifeq "$(os)" "win" endif # Enable saving compiler options and version in object files and libraries. -ifneq "$(c)" "gcc" +ifeq "$(filter gcc clang,$(c))" "" ifeq "$(os)" "win" # Newer MS linker issues warnings if -Qsox is used: # "warning LNK4224: /COMMENT is no longer supported; ignored" @@ -231,24 +243,17 @@ endif # Disable use of EBP as general purpose register. ifeq "$(os)" "win" ifeq "$(arch)" "32" - # ??? In original makefile, this option was specified only in debug builds. - # Compare with Linux* OS/OS X* -fno-omit-frame-pointer, which defined always. c-flags += -Oy- cxx-flags += -Oy- endif -else - ifneq "$(arch)" "64" - c-flags += -fno-omit-frame-pointer - cxx-flags += -fno-omit-frame-pointer - endif endif ifeq "$(os)" "lin" c-flags += -Wsign-compare cxx-flags += -Wsign-compare ld-flags += -Wsign-compare - ifneq "$(c)" "gcc" - c-flags += -Werror + ifeq "$(filter gcc clang,$(c))" "" + c-flags += -Werror cxx-flags += -Werror ld-flags += -Werror endif @@ -306,7 +311,7 @@ ifeq "$(CPLUSPLUS)" "on" ifeq "$(os)" "win" c-flags += -TP else - ifeq "$(c)" "gcc" + ifneq "$(filter gcc clang,$(c))" "" c-flags += -x c++ -std=c++0x else c-flags += -Kc++ @@ -352,12 +357,18 @@ ifeq "$(os)" "lin" ld-flags-dll += -static-libgcc ld-flags-extra += -Wl,-ldl endif + ifeq "$(c)" "clang" + ld-flags-extra += -Wl,-ldl + endif ifeq "$(arch)" "32" - ifneq "$(c)" "gcc" + ifeq "$(filter gcc clang,$(c))" "" # to workaround CQ215229 link libirc_pic manually ld-flags-extra += -lirc_pic endif endif + ifeq "$(filter 32 32e 64,$(arch))" "" + ld-flags-extra += $(shell pkg-config --libs libffi) + endif else ifeq "$(arch)" "32e" # ??? @@ -452,13 +463,13 @@ cpp-flags += -D KMP_VERSION_MAJOR=$(VERSION) cpp-flags += -D CACHE_LINE=64 cpp-flags += -D KMP_ADJUST_BLOCKTIME=1 cpp-flags += -D BUILD_PARALLEL_ORDERED +cpp-flags += -D KMP_ASM_INTRINS ifneq "$(os)" "lrb" cpp-flags += -D USE_LOAD_BALANCE endif ifneq "$(os)" "win" cpp-flags += -D USE_CBLKDATA # ??? Windows* OS: USE_CBLKDATA defined in kmp.h. - cpp-flags += -D KMP_ASM_INTRINS endif ifeq "$(os)" "win" cpp-flags += -D KMP_WIN_CDECL @@ -477,22 +488,42 @@ else # 5 endif endif +ifneq "$(filter 32 32e,$(arch))" "" cpp-flags += -D KMP_USE_ADAPTIVE_LOCKS=1 -D KMP_DEBUG_ADAPTIVE_LOCKS=0 +endif -# define compatibility with OMP 3.0 +# define compatibility with different OpenMP versions +have_omp_50=0 +have_omp_41=0 +have_omp_40=0 +have_omp_30=0 +ifeq "$(OMP_VERSION)" "50" + have_omp_50=1 + have_omp_41=1 + have_omp_40=1 + have_omp_30=1 +endif +ifeq "$(OMP_VERSION)" "41" + have_omp_50=0 + have_omp_41=1 + have_omp_40=1 + have_omp_30=1 +endif ifeq "$(OMP_VERSION)" "40" - cpp-flags += -D OMP_40_ENABLED=1 - cpp-flags += -D OMP_30_ENABLED=1 -else - ifeq "$(OMP_VERSION)" "30" - cpp-flags += -D OMP_40_ENABLED=0 - cpp-flags += -D OMP_30_ENABLED=1 - else - cpp-flags += -D OMP_40_ENABLED=0 - cpp-flags += -D OMP_30_ENABLED=0 - # TODO: Check OMP_30_ENABLED == 0 is processed correctly. - endif + have_omp_50=0 + have_omp_41=0 + have_omp_40=1 + have_omp_30=1 +endif +ifeq "$(OMP_VERSION)" "30" + have_omp_50=0 + have_omp_41=0 + have_omp_40=0 + have_omp_30=1 endif +cpp-flags += -D OMP_50_ENABLED=$(have_omp_50) -D OMP_41_ENABLED=$(have_omp_41) +cpp-flags += -D OMP_40_ENABLED=$(have_omp_40) -D OMP_30_ENABLED=$(have_omp_30) + # Using ittnotify is enabled by default. USE_ITT_NOTIFY = 1 @@ -541,8 +572,13 @@ endif # only one, target architecture). So we cannot autodetect target architecture # within the file, and have to pass target architecture from command line. ifneq "$(os)" "win" - z_Linux_asm$(obj) : \ - cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64) + ifeq "$(arch)" "arm" + z_Linux_asm$(obj) : \ + cpp-flags += -D KMP_ARCH_ARM + else + z_Linux_asm$(obj) : \ + cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64) + endif endif # Defining KMP_BUILD_DATE for all files leads to warning "incompatible redefinition", because the @@ -606,7 +642,6 @@ ld-flags += $(LDFLAGS) lib_c_items := \ kmp_ftn_cdecl \ kmp_ftn_extra \ - kmp_ftn_stdcall \ kmp_version \ $(empty) lib_cpp_items := @@ -653,6 +688,7 @@ else # norm or prof ifeq "$(OMP_VERSION)" "40" lib_cpp_items += kmp_taskdeps + lib_cpp_items += kmp_cancel endif # OS-specific files. @@ -1214,7 +1250,9 @@ ifneq "$(os)" "lrb" tt-c-flags += -pthread endif tt-c-flags += -o $(tt-exe-file) - tt-c-flags += $(if $(filter 64,$(arch)),,$(if $(filter 32,$(arch)),-m32,-m64)) + ifneq "$(filter 32 32e 64,$(arch))" "" + tt-c-flags += $(if $(filter 64,$(arch)),,$(if $(filter 32,$(arch)),-m32,-m64)) + endif tt-libs += $(lib_file) ifeq "$(os)-$(COVERAGE)-$(LINK_TYPE)" "lin-on-stat" # Static coverage build on Linux* OS fails due to unresolved symbols dlopen, dlsym, dlclose. @@ -1343,8 +1381,16 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))" "" ifeq "$(arch)" "64" td_exp += libc.so.6.1 endif + ifeq "$(arch)" "arm" + td_exp += libc.so.6 + td_exp += ld-linux-armhf.so.3 + endif td_exp += libdl.so.2 td_exp += libgcc_s.so.1 + ifeq "$(filter 32 32e 64,$(arch))" "" + td_exp += libffi.so.6 + td_exp += libffi.so.5 + endif ifneq "$(LIB_TYPE)" "stub" td_exp += libpthread.so.0 endif diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h index 9cc398c0b0e..9df6e2f3980 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h @@ -109,12 +109,18 @@ The same ID may not be reused for different instances, unless a previous # define ITT_PLATFORM_POSIX 2 #endif /* ITT_PLATFORM_POSIX */ +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC # else # define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif /* _WIN32 */ +# endif #endif /* ITT_PLATFORM */ #if defined(_UNICODE) && !defined(UNICODE) @@ -135,11 +141,11 @@ The same ID may not be reused for different instances, unless a previous # if ITT_PLATFORM==ITT_PLATFORM_WIN # define CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define CDECL /* not actual on x86_64 platform */ -# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# if defined _M_IX86 || defined __i386__ # define CDECL __attribute__ ((cdecl)) -# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# else /* _M_IX86 || __i386__ */ +# define CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* CDECL */ @@ -147,11 +153,11 @@ The same ID may not be reused for different instances, unless a previous # if ITT_PLATFORM==ITT_PLATFORM_WIN # define STDCALL __stdcall # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define STDCALL /* not supported on x86_64 platform */ -# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# if defined _M_IX86 || defined __i386__ # define STDCALL __attribute__ ((stdcall)) -# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* STDCALL */ @@ -164,8 +170,8 @@ The same ID may not be reused for different instances, unless a previous #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ -#define INLINE __forceinline -#define INLINE_ATTRIBUTE /* nothing */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* * Generally, functions are not inlined unless optimization is specified. @@ -173,11 +179,11 @@ The same ID may not be reused for different instances, unless a previous * if no optimization level was specified. */ #ifdef __STRICT_ANSI__ -#define INLINE static +#define ITT_INLINE static #else /* __STRICT_ANSI__ */ -#define INLINE static inline +#define ITT_INLINE static inline #endif /* __STRICT_ANSI__ */ -#define INLINE_ATTRIBUTE __attribute__ ((always_inline)) +#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused)) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /** @endcond */ @@ -398,6 +404,128 @@ ITT_STUBV(ITTAPI, void, thread_ignore, (void)) /** @} threads group */ /** + * @defgroup suppress Error suppression + * @ingroup public + * General behavior: application continues to run, but errors are suppressed + * + * @{ + */ + +/*****************************************************************//** + * @name group of functions used for error suppression in correctness tools + *********************************************************************/ +/** @{ */ +/** + * @hideinitializer + * @brief possible value for suppression mask + */ +#define __itt_suppress_all_errors 0x7fffffff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from threading analysis) + */ +#define __itt_suppress_threading_errors 0x000000ff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from memory analysis) + */ +#define __itt_suppress_memory_errors 0x0000ff00 + +/** + * @brief Start suppressing errors identified in mask on this thread + */ +void ITTAPI __itt_suppress_push(unsigned int mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) +#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) +#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_push(mask) +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effects of the matching call to __itt_suppress_push + */ +void ITTAPI __itt_suppress_pop(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_pop, (void)) +#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) +#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_pop() +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum __itt_suppress_mode { + __itt_unsuppress_range, + __itt_suppress_range +} __itt_suppress_mode_t; + +/** + * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask + */ +void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) +#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_mark_range(mask) +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching + * call is found, nothing is changed. + */ +void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) +#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_clear_range(mask) +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ +/** @} suppress group */ + +/** * @defgroup sync Synchronization * @ingroup public * Indicate user-written synchronization code @@ -820,8 +948,10 @@ void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_inst #if ITT_PLATFORM==ITT_PLATFORM_WIN void ITTAPI __itt_model_site_beginW(const wchar_t *name); #endif +void ITTAPI __itt_model_site_beginA(const char *name); void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); +void ITTAPI __itt_model_site_end_2(void); /** @cond exclude_from_documentation */ #ifndef INTEL_NO_MACRO_BODY @@ -830,18 +960,24 @@ ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_ #if ITT_PLATFORM==ITT_PLATFORM_WIN ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) #endif +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) +ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) #define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) #define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) #if ITT_PLATFORM==ITT_PLATFORM_WIN #define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) #define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) #endif +#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) +#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) #define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) #define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) #define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) #define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) +#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) +#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) #else /* INTEL_NO_ITTNOTIFY_API */ #define __itt_model_site_begin(site, instance, name) #define __itt_model_site_begin_ptr 0 @@ -849,18 +985,24 @@ ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_ #define __itt_model_site_beginW(name) #define __itt_model_site_beginW_ptr 0 #endif +#define __itt_model_site_beginA(name) +#define __itt_model_site_beginA_ptr 0 #define __itt_model_site_beginAL(name, siteNameLen) #define __itt_model_site_beginAL_ptr 0 #define __itt_model_site_end(site, instance) #define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2() +#define __itt_model_site_end_2_ptr 0 #endif /* INTEL_NO_ITTNOTIFY_API */ #else /* INTEL_NO_MACRO_BODY */ #define __itt_model_site_begin_ptr 0 #if ITT_PLATFORM==ITT_PLATFORM_WIN #define __itt_model_site_beginW_ptr 0 #endif +#define __itt_model_site_beginA_ptr 0 #define __itt_model_site_beginAL_ptr 0 #define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ @@ -878,9 +1020,14 @@ ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_ void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); #if ITT_PLATFORM==ITT_PLATFORM_WIN void ITTAPI __itt_model_task_beginW(const wchar_t *name); +void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); #endif +void ITTAPI __itt_model_task_beginA(const char *name); void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_iteration_taskA(const char *name); +void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); +void ITTAPI __itt_model_task_end_2(void); /** @cond exclude_from_documentation */ #ifndef INTEL_NO_MACRO_BODY @@ -888,19 +1035,34 @@ void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_inst ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) #if ITT_PLATFORM==ITT_PLATFORM_WIN ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) #endif +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) +ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) #define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) #define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) #if ITT_PLATFORM==ITT_PLATFORM_WIN #define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) #define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) +#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) +#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) #endif +#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) +#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) #define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) #define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) +#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) +#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) +#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) +#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) #define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) #define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) +#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) +#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) #else /* INTEL_NO_ITTNOTIFY_API */ #define __itt_model_task_begin(task, instance, name) #define __itt_model_task_begin_ptr 0 @@ -908,18 +1070,30 @@ ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_ #define __itt_model_task_beginW(name) #define __itt_model_task_beginW_ptr 0 #endif +#define __itt_model_task_beginA(name) +#define __itt_model_task_beginA_ptr 0 #define __itt_model_task_beginAL(name, siteNameLen) #define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA(name) +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL(name, siteNameLen) +#define __itt_model_iteration_taskAL_ptr 0 #define __itt_model_task_end(task, instance) #define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2() +#define __itt_model_task_end_2_ptr 0 #endif /* INTEL_NO_ITTNOTIFY_API */ #else /* INTEL_NO_MACRO_BODY */ #define __itt_model_task_begin_ptr 0 #if ITT_PLATFORM==ITT_PLATFORM_WIN #define __itt_model_task_beginW_ptr 0 #endif +#define __itt_model_task_beginA_ptr 0 #define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL_ptr 0 #define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ @@ -936,26 +1110,40 @@ ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_ * but may not have identical semantics.) */ void ITTAPI __itt_model_lock_acquire(void *lock); +void ITTAPI __itt_model_lock_acquire_2(void *lock); void ITTAPI __itt_model_lock_release(void *lock); +void ITTAPI __itt_model_lock_release_2(void *lock); /** @cond exclude_from_documentation */ #ifndef INTEL_NO_MACRO_BODY #ifndef INTEL_NO_ITTNOTIFY_API ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) #define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) #define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) +#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) +#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) #define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) #define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) +#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) +#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) #else /* INTEL_NO_ITTNOTIFY_API */ #define __itt_model_lock_acquire(lock) #define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2(lock) +#define __itt_model_lock_acquire_2_ptr 0 #define __itt_model_lock_release(lock) #define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2(lock) +#define __itt_model_lock_release_2_ptr 0 #endif /* INTEL_NO_ITTNOTIFY_API */ #else /* INTEL_NO_MACRO_BODY */ #define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2_ptr 0 #define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ @@ -1104,25 +1292,32 @@ ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) */ void ITTAPI __itt_model_disable_push(__itt_model_disable x); void ITTAPI __itt_model_disable_pop(void); +void ITTAPI __itt_model_aggregate_task(size_t x); /** @cond exclude_from_documentation */ #ifndef INTEL_NO_MACRO_BODY #ifndef INTEL_NO_ITTNOTIFY_API ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) #define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) #define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) #define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) #define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) +#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) +#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) #else /* INTEL_NO_ITTNOTIFY_API */ #define __itt_model_disable_push(x) #define __itt_model_disable_push_ptr 0 #define __itt_model_disable_pop() #define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task(x) +#define __itt_model_aggregate_task_ptr 0 #endif /* INTEL_NO_ITTNOTIFY_API */ #else /* INTEL_NO_MACRO_BODY */ #define __itt_model_disable_push_ptr 0 #define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ /** @} model group */ @@ -1348,9 +1543,97 @@ ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) #define __itt_heap_internal_access_end_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ -/** @} heap group */ + +/** @brief record memory growth begin */ +void ITTAPI __itt_heap_record_memory_growth_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) +#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) +#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_begin() +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth end */ +void ITTAPI __itt_heap_record_memory_growth_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) +#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) +#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_end() +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ +/** + * @brief Specify the type of heap detection/reporting to modify. + */ +/** + * @hideinitializer + * @brief Report on memory leaks. + */ +#define __itt_heap_leaks 0x00000001 + +/** + * @hideinitializer + * @brief Report on memory growth. + */ +#define __itt_heap_growth 0x00000002 + + +/** @brief heap reset detection */ +void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) +#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) +#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reset_detection() +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief report */ +void ITTAPI __itt_heap_record(unsigned int record_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) +#define __itt_heap_record ITTNOTIFY_VOID(heap_record) +#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record() +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} heap group */ +/** @endcond */ /* ========================================================================== */ /** @@ -1475,8 +1758,8 @@ static const __itt_id __itt_null = { 0, 0, 0 }; * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. */ -INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) INLINE_ATTRIBUTE; -INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) { __itt_id id = __itt_null; id.d1 = (unsigned long long)((uintptr_t)addr); @@ -1633,6 +1916,40 @@ ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *na /** @endcond */ /** @} handles group */ +/** @cond exclude_from_documentation */ +typedef unsigned long long __itt_timestamp; +/** @endcond */ + +static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL; + +/** @cond exclude_from_gpa_documentation */ + +/** + * @ingroup timestamps + * @brief Return timestamp corresponding to current moment. + * This returns the timestamp in format that is most relevant for the current + * host or platform. Do not rely that it's RDTSC value. It is possible + * to compare __itt_timestamp values with "<" operator. + */ +__itt_timestamp ITTAPI __itt_get_timestamp(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) +#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) +#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_get_timestamp() +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} timestamps */ +/** @endcond */ + /** @cond exclude_from_gpa_documentation */ /** @@ -1717,24 +2034,46 @@ void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); */ void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); +/** + * @ingroup frames + * @brief Submits a frame instance. + * Successive calls to __itt_frame_begin or __itt_frame_submit with the + * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit + * with the same ID. + * Passing special __itt_timestamp_none value as "end" argument means + * take the current timestamp as the end timestamp. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + * @param[in] begin Timestamp of the beggining of the frame + * @param[in] end Timestamp of the end of the frame + */ +void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, + __itt_timestamp begin, __itt_timestamp end); + /** @cond exclude_from_documentation */ #ifndef INTEL_NO_MACRO_BODY #ifndef INTEL_NO_ITTNOTIFY_API ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) #define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) #define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) #define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) #define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) +#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) +#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) #else /* INTEL_NO_ITTNOTIFY_API */ #define __itt_frame_begin_v3(domain,id) #define __itt_frame_begin_v3_ptr 0 #define __itt_frame_end_v3(domain,id) #define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3(domain,id,begin,end) +#define __itt_frame_submit_v3_ptr 0 #endif /* INTEL_NO_ITTNOTIFY_API */ #else /* INTEL_NO_MACRO_BODY */ #define __itt_frame_begin_v3_ptr 0 #define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ /** @} frames group */ @@ -2730,8 +3069,125 @@ ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ /** @} events group */ + + +/** + * @defgroup arrays Arrays Visualizer + * @ingroup public + * Visualize arrays + * @{ + */ + +/** + * @enum __itt_av_data_type + * @brief Defines types of arrays data (for C/C++ intrinsic types) + */ +typedef enum +{ + __itt_e_first = 0, + __itt_e_char = 0, /* 1-byte integer */ + __itt_e_uchar, /* 1-byte unsigned integer */ + __itt_e_int16, /* 2-byte integer */ + __itt_e_uint16, /* 2-byte unsigned integer */ + __itt_e_int32, /* 4-byte integer */ + __itt_e_uint32, /* 4-byte unsigned integer */ + __itt_e_int64, /* 8-byte integer */ + __itt_e_uint64, /* 8-byte unsigned integer */ + __itt_e_float, /* 4-byte floating */ + __itt_e_double, /* 8-byte floating */ + __itt_e_last = __itt_e_double +} __itt_av_data_type; + +/** + * @brief Save an array data to a file. + * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). + * @param[in] data - pointer to the array data + * @param[in] rank - the rank of the array + * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. + * The size of dimensions must be equal to the rank + * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) + * @param[in] filePath - the file path; the output format is defined by the file extension + * @param[in] columnOrder - defines how the array is stored in the linear memory. + * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_av_save __itt_av_saveW +# define __itt_av_save_ptr __itt_av_saveW_ptr +#else /* UNICODE */ +# define __itt_av_save __itt_av_saveA +# define __itt_av_save_ptr __itt_av_saveA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) +#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) +#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) +#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save ITTNOTIFY_DATA(av_save) +#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA(name) +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW(name) +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save(name) +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ +void ITTAPI __itt_enable_attach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, enable_attach, (void)) +#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) +#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_enable_attach() +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** @} arrays group */ + +/** @endcond */ + + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h index bccaa383ef0..40c8614d222 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h @@ -42,12 +42,18 @@ # define ITT_PLATFORM_POSIX 2 #endif /* ITT_PLATFORM_POSIX */ +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC # else # define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif /* _WIN32 */ +# endif #endif /* ITT_PLATFORM */ #if defined(_UNICODE) && !defined(UNICODE) @@ -68,11 +74,11 @@ # if ITT_PLATFORM==ITT_PLATFORM_WIN # define CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define CDECL /* not actual on x86_64 platform */ -# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# if defined _M_IX86 || defined __i386__ # define CDECL __attribute__ ((cdecl)) -# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# else /* _M_IX86 || __i386__ */ +# define CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* CDECL */ @@ -80,11 +86,11 @@ # if ITT_PLATFORM==ITT_PLATFORM_WIN # define STDCALL __stdcall # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define STDCALL /* not supported on x86_64 platform */ -# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# if defined _M_IX86 || defined __i386__ # define STDCALL __attribute__ ((stdcall)) -# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* STDCALL */ @@ -97,8 +103,8 @@ #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ -#define INLINE __forceinline -#define INLINE_ATTRIBUTE /* nothing */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* * Generally, functions are not inlined unless optimization is specified. @@ -106,11 +112,11 @@ * if no optimization level was specified. */ #ifdef __STRICT_ANSI__ -#define INLINE static +#define ITT_INLINE static #else /* __STRICT_ANSI__ */ -#define INLINE static inline +#define ITT_INLINE static inline #endif /* __STRICT_ANSI__ */ -#define INLINE_ATTRIBUTE __attribute__ ((always_inline)) +#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused)) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /** @endcond */ @@ -122,17 +128,19 @@ # define ITT_ARCH_IA32E 2 #endif /* ITT_ARCH_IA32E */ -#ifndef ITT_ARCH_IA64 -# define ITT_ARCH_IA64 3 -#endif /* ITT_ARCH_IA64 */ +#ifndef ITT_ARCH_ARM +# define ITT_ARCH_ARM 4 +#endif /* ITT_ARCH_ARM */ #ifndef ITT_ARCH -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# if defined _M_IX86 || defined __i386__ +# define ITT_ARCH ITT_ARCH_IA32 +# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ # define ITT_ARCH ITT_ARCH_IA32E -# elif defined _M_IA64 || defined __ia64 +# elif defined _M_IA64 || defined __ia64__ # define ITT_ARCH ITT_ARCH_IA64 -# else -# define ITT_ARCH ITT_ARCH_IA32 +# elif defined _M_ARM || __arm__ +# define ITT_ARCH ITT_ARCH_ARM # endif #endif @@ -145,7 +153,10 @@ #define ITT_TO_STR_AUX(x) #x #define ITT_TO_STR(x) ITT_TO_STR_AUX(x) -#define __ITT_BUILD_ASSERT(expr, suffix) do { static char __itt_build_check_##suffix[(expr) ? 1 : -1]; __itt_build_check_##suffix[0] = 0; } while(0) +#define __ITT_BUILD_ASSERT(expr, suffix) do { \ + static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ + __itt_build_check_##suffix[0] = 0; \ +} while(0) #define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) #define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) @@ -158,7 +169,8 @@ #define API_VERSION_NUM 0.0.0 #endif /* API_VERSION_NUM */ -#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) " (" ITT_TO_STR(API_VERSION_BUILD) ")" +#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ + " (" ITT_TO_STR(API_VERSION_BUILD) ")" /* OS communication functions */ #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -176,12 +188,16 @@ typedef CRITICAL_SECTION mutex_t; #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ #endif /* _GNU_SOURCE */ +#ifndef __USE_UNIX98 +#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */ +#endif /*__USE_UNIX98*/ #include <pthread.h> typedef void* lib_t; typedef pthread_t TIDT; typedef pthread_mutex_t mutex_t; #define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -#define _strong_alias(name, aliasname) extern __typeof (name) aliasname __attribute__ ((alias (#name))); +#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); #define strong_alias(name, aliasname) _strong_alias(name, aliasname) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -200,29 +216,35 @@ typedef pthread_mutex_t mutex_t; #define __itt_thread_id() GetCurrentThreadId() #define __itt_thread_yield() SwitchToThread() #ifndef ITT_SIMPLE_INIT -INLINE int __itt_interlocked_increment(volatile long* ptr) +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) { return InterlockedIncrement(ptr); } #endif /* ITT_SIMPLE_INIT */ #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ #define __itt_get_proc(lib, name) dlsym(lib, name) -#define __itt_mutex_init(mutex) \ - { \ - pthread_mutexattr_t mutex_attr; \ - int error_code = pthread_mutexattr_init(&mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_init", error_code); \ - error_code = pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", error_code); \ - error_code = pthread_mutex_init(mutex, &mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutex_init", error_code); \ - error_code = pthread_mutexattr_destroy(&mutex_attr); \ - if (error_code) \ - __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", error_code); \ - } +#define __itt_mutex_init(mutex) {\ + pthread_mutexattr_t mutex_attr; \ + int error_code = pthread_mutexattr_init(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ + error_code); \ + error_code = pthread_mutexattr_settype(&mutex_attr, \ + PTHREAD_MUTEX_RECURSIVE); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ + error_code); \ + error_code = pthread_mutex_init(mutex, &mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutex_init", \ + error_code); \ + error_code = pthread_mutexattr_destroy(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ + error_code); \ +} #define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) #define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) #define __itt_load_lib(name) dlopen(name, RTLD_LAZY) @@ -238,23 +260,29 @@ INLINE int __itt_interlocked_increment(volatile long* ptr) #ifdef __INTEL_COMPILER #define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) #else /* __INTEL_COMPILER */ -/* TODO: Add Support for not Intel compilers for IA64 */ +/* TODO: Add Support for not Intel compilers for IA-64 architecture */ #endif /* __INTEL_COMPILER */ -#else /* ITT_ARCH!=ITT_ARCH_IA64 */ -INLINE int __TBB_machine_fetchadd4(volatile void* ptr, long addend) +#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ +ITT_INLINE long +__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) { - int result; - __asm__ __volatile__("lock\nxaddl %0,%1" - : "=r"(result),"=m"(*(long*)ptr) - : "0"(addend), "m"(*(long*)ptr) + long result; + __asm__ __volatile__("lock\nxadd %0,%1" + : "=r"(result),"=m"(*(int*)ptr) + : "0"(addend), "m"(*(int*)ptr) : "memory"); return result; } +#elif ITT_ARCH==ITT_ARCH_ARM +#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) #endif /* ITT_ARCH==ITT_ARCH_IA64 */ #ifndef ITT_SIMPLE_INIT -INLINE int __itt_interlocked_increment(volatile long* ptr) +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) { - return __TBB_machine_fetchadd4(ptr, 1) + 1; + return __TBB_machine_fetchadd4(ptr, 1) + 1L; } #endif /* ITT_SIMPLE_INIT */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c index 5257d0d0299..4b5f464feb8 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c @@ -29,7 +29,7 @@ #include "disable_warnings.h" -static const char api_version[] = API_VERSION "\0\n@(#) 201495 2011-12-01 14:14:56Z\n"; +static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 42754 $\n"; #define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) @@ -43,6 +43,12 @@ static const char* ittnotify_lib_name = "libittnotify.dylib"; #error Unsupported or unknown OS. #endif +#ifdef __ANDROID__ +/* default location of userapi collector on Android */ +#define ANDROID_ITTNOTIFY_DEFAULT_PATH "/data/data/com.intel.vtune/intel/libittnotify.so" +#endif + + #ifndef LIB_VAR_NAME #if ITT_ARCH==ITT_ARCH_IA32 #define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 @@ -146,7 +152,7 @@ typedef struct ___itt_group_alias static __itt_group_alias group_alias[] = { { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, - { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark) }, + { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, { NULL, (__itt_group_none) }, { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ }; @@ -162,7 +168,7 @@ static __itt_api_info api_list[] = { /* Define functions with static implementation */ #undef ITT_STUB #undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, #define ITT_STUBV ITT_STUB #define __ITT_INTERNAL_INIT #include "ittnotify_static.h" @@ -170,7 +176,7 @@ static __itt_api_info api_list[] = { /* Define functions without static implementation */ #undef ITT_STUB #undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, #define ITT_STUBV ITT_STUB #include "ittnotify_static.h" {NULL, NULL, NULL, NULL, __itt_group_none} @@ -225,7 +231,7 @@ static __itt_string_handle null_string_handle = { static const char dll_path[PATH_MAX] = { 0 }; /* static part descriptor which handles. all notification api attributes. */ -static __itt_global __itt_ittapi_global = { +__itt_global _N_(_ittapi_global) = { ITT_MAGIC, /* identification info */ ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ 0, /* api_initialized */ @@ -261,9 +267,9 @@ static void __itt_report_error(__itt_error_code code, ...) { va_list args; va_start(args, code); - if (__itt_ittapi_global.error_handler != NULL) + if (_N_(_ittapi_global).error_handler != NULL) { - __itt_error_handler_t* handler = (__itt_error_handler_t*)__itt_ittapi_global.error_handler; + __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; handler(code, args); } #ifdef ITT_NOTIFY_EXT_REPORT @@ -281,7 +287,7 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))( { __itt_domain *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) @@ -289,16 +295,16 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))( } if (name == NULL) - return __itt_ittapi_global.domain_list; + return _N_(_ittapi_global).domain_list; - ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global); - for (h_tail = NULL, h = __itt_ittapi_global.domain_list; h != NULL; h_tail = h, h = h->next) + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; if (h == NULL) { - NEW_DOMAIN_W(&__itt_ittapi_global,h,h_tail,name); + NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } @@ -309,7 +315,7 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c { __itt_domain *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -322,16 +328,16 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c } if (name == NULL) - return __itt_ittapi_global.domain_list; + return _N_(_ittapi_global).domain_list; - ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global); - for (h_tail = NULL, h = __itt_ittapi_global.domain_list; h != NULL; h_tail = h, h = h->next) + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; if (h == NULL) { - NEW_DOMAIN_A(&__itt_ittapi_global,h,h_tail,name); + NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } @@ -340,7 +346,7 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre { __itt_string_handle *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) @@ -348,16 +354,16 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre } if (name == NULL) - return __itt_ittapi_global.string_list; + return _N_(_ittapi_global).string_list; - ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global); - for (h_tail = NULL, h = __itt_ittapi_global.string_list; h != NULL; h_tail = h, h = h->next) + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) if (h->strW != NULL && !wcscmp(h->strW, name)) break; if (h == NULL) { - NEW_STRING_HANDLE_W(&__itt_ittapi_global,h,h_tail,name); + NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } @@ -368,7 +374,7 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre { __itt_string_handle *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -381,16 +387,16 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre } if (name == NULL) - return __itt_ittapi_global.string_list; + return _N_(_ittapi_global).string_list; - ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global); - for (h_tail = NULL, h = __itt_ittapi_global.string_list; h != NULL; h_tail = h, h = h->next) + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; if (h == NULL) { - NEW_STRING_HANDLE_A(&__itt_ittapi_global,h,h_tail,name); + NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } @@ -398,7 +404,7 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) { - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) @@ -407,12 +413,12 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) return; } } - __itt_ittapi_global.state = __itt_collection_paused; + _N_(_ittapi_global).state = __itt_collection_paused; } static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) { - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) @@ -421,7 +427,7 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) return; } } - __itt_ittapi_global.state = __itt_collection_normal; + _N_(_ittapi_global).state = __itt_collection_normal; } #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -430,7 +436,7 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const w TIDT tid = __itt_thread_id(); __itt_thread_info *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) @@ -440,18 +446,18 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const w } } - __itt_mutex_lock(&__itt_ittapi_global.mutex); - for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next) + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) if (h->tid == tid) break; if (h == NULL) { - NEW_THREAD_INFO_W(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_normal, name); + NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); } else { h->nameW = name ? _wcsdup(name) : NULL; } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); } static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) @@ -469,7 +475,7 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const ch TIDT tid = __itt_thread_id(); __itt_thread_info *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -487,18 +493,18 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const ch #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ } - __itt_mutex_lock(&__itt_ittapi_global.mutex); - for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next) + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) if (h->tid == tid) break; if (h == NULL) { - NEW_THREAD_INFO_A(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_normal, name); + NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name); } else { h->nameA = name ? __itt_fstrdup(name) : NULL; } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); } #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -522,7 +528,7 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) TIDT tid = __itt_thread_id(); __itt_thread_info *h_tail, *h; - if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0) + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0) { __itt_init_ittlib_name(NULL, __itt_group_all); if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) @@ -532,19 +538,19 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) } } - __itt_mutex_lock(&__itt_ittapi_global.mutex); - for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next) + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next) if (h->tid == tid) break; if (h == NULL) { static const char* name = "unknown"; - NEW_THREAD_INFO_A(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_ignored, name); + NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name); } else { h->state = __itt_thread_ignored; } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); } static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) @@ -552,6 +558,17 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); } +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) +{ +#ifdef __ANDROID__ + /* + * if LIB_VAR_NAME env variable were set before then stay previous value + * else set default path + */ + setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); +#endif +} + /* -------------------------------------------------------------------------- */ static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) @@ -666,80 +683,10 @@ static const char* __itt_get_env_var(const char* name) return NULL; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN - -#include <Winreg.h> - -typedef LONG (APIENTRY* RegCloseKeyProcType)(HKEY); -typedef LONG (APIENTRY* RegOpenKeyExAProcType)(HKEY, LPCTSTR, DWORD, REGSAM, PHKEY); -typedef LONG (APIENTRY* RegGetValueAProcType)(HKEY, LPCTSTR, LPCTSTR, DWORD, LPDWORD, PVOID, LPDWORD); - -/* This function return value of registry key that placed into static buffer. - * This was done to aviod dynamic memory allocation. - */ -static const char* __itt_get_lib_name_registry(void) -{ -#define MAX_REG_VALUE_SIZE 4086 - static char reg_buff[MAX_REG_VALUE_SIZE]; - DWORD size; - LONG res; - HKEY hKey; - RegCloseKeyProcType pRegCloseKey; - RegOpenKeyExAProcType pRegOpenKeyExA; - RegGetValueAProcType pRegGetValueA; - HMODULE h_advapi32 = LoadLibraryA("advapi32.dll"); - DWORD autodetect = 0; - - if (h_advapi32 == NULL) - { - return NULL; - } - - pRegCloseKey = (RegCloseKeyProcType)GetProcAddress(h_advapi32, "RegCloseKey"); - pRegOpenKeyExA = (RegOpenKeyExAProcType)GetProcAddress(h_advapi32, "RegOpenKeyExA"); - pRegGetValueA = (RegGetValueAProcType)GetProcAddress(h_advapi32, "RegGetValueA"); - - if (pRegCloseKey == NULL || - pRegOpenKeyExA == NULL || - pRegGetValueA == NULL) - { - FreeLibrary(h_advapi32); - return NULL; - } - - res = pRegOpenKeyExA(HKEY_CURRENT_USER, (LPCTSTR)"Software\\Intel Corporation\\ITT Environment\\Collector", 0, KEY_READ, &hKey); - if (res != ERROR_SUCCESS || hKey == 0) - { - FreeLibrary(h_advapi32); - return NULL; - } - - size = sizeof(DWORD); - res = pRegGetValueA(hKey, (LPCTSTR)"AutoDetect", NULL, RRF_RT_REG_DWORD, NULL, (BYTE*)&autodetect, &size); - if (res != ERROR_SUCCESS || size == 0 || autodetect == 0) - { - pRegCloseKey(hKey); - FreeLibrary(h_advapi32); - return NULL; - } - - size = MAX_REG_VALUE_SIZE-1; - res = pRegGetValueA(hKey, (LPCTSTR)ITT_TO_STR(LIB_VAR_NAME), NULL, REG_SZ, NULL, (BYTE*)®_buff, &size); - pRegCloseKey(hKey); - FreeLibrary(h_advapi32); - - return (res == ERROR_SUCCESS && size > 0) ? reg_buff : NULL; -} - -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - static const char* __itt_get_lib_name(void) { const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (lib_name == NULL) - lib_name = __itt_get_lib_name_registry(); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + return lib_name; } @@ -761,9 +708,8 @@ static __itt_group_id __itt_get_groups(void) const char* chunk; while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) { - __itt_fstrcpyn(gr, chunk, sizeof(gr)); - - gr[min((unsigned int)len, sizeof(gr) - 1)] = 0; + __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1); + gr[min(len, (int)(sizeof(gr) - 1))] = 0; for (i = 0; group_list[i].name != NULL; i++) { @@ -810,8 +756,8 @@ static void __itt_reinit_all_pointers(void) { register int i; // Fill all pointers with initial stubs - for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++) - *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].init_func; + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; } */ @@ -819,8 +765,8 @@ static void __itt_nullify_all_pointers(void) { register int i; /* Nulify all pointers except domain_create and string_handle_create */ - for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++) - *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func; + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; } #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -834,30 +780,30 @@ ITT_EXTERN_C void _N_(fini_ittlib)(void) __itt_api_fini_t* __itt_api_fini_ptr; static volatile TIDT current_thread = 0; - if (__itt_ittapi_global.api_initialized) + if (_N_(_ittapi_global).api_initialized) { - __itt_mutex_lock(&__itt_ittapi_global.mutex); - if (__itt_ittapi_global.api_initialized) + __itt_mutex_lock(&_N_(_ittapi_global).mutex); + if (_N_(_ittapi_global).api_initialized) { if (current_thread == 0) { current_thread = __itt_thread_id(); - __itt_api_fini_ptr = (__itt_api_fini_t*)__itt_get_proc(__itt_ittapi_global.lib, "__itt_api_fini"); + __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); if (__itt_api_fini_ptr) - __itt_api_fini_ptr(&__itt_ittapi_global); + __itt_api_fini_ptr(&_N_(_ittapi_global)); __itt_nullify_all_pointers(); /* TODO: !!! not safe !!! don't support unload so far. - * if (__itt_ittapi_global.lib != NULL) - * __itt_unload_lib(__itt_ittapi_global.lib); - * __itt_ittapi_global.lib = NULL; + * if (_N_(_ittapi_global).lib != NULL) + * __itt_unload_lib(_N_(_ittapi_global).lib); + * _N_(_ittapi_global).lib = NULL; */ - __itt_ittapi_global.api_initialized = 0; + _N_(_ittapi_global).api_initialized = 0; current_thread = 0; } } - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); } } @@ -870,51 +816,52 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou #endif /* ITT_COMPLETE_GROUP */ static volatile TIDT current_thread = 0; - if (!__itt_ittapi_global.api_initialized) + if (!_N_(_ittapi_global).api_initialized) { #ifndef ITT_SIMPLE_INIT - ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global); + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); #endif /* ITT_SIMPLE_INIT */ - if (!__itt_ittapi_global.api_initialized) + if (!_N_(_ittapi_global).api_initialized) { if (current_thread == 0) { current_thread = __itt_thread_id(); - __itt_ittapi_global.thread_list->tid = current_thread; + _N_(_ittapi_global).thread_list->tid = current_thread; if (lib_name == NULL) lib_name = __itt_get_lib_name(); groups = __itt_get_groups(); if (groups != __itt_group_none || lib_name != NULL) { - __itt_ittapi_global.lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); - if (__itt_ittapi_global.lib != NULL) + _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); + + if (_N_(_ittapi_global).lib != NULL) { __itt_api_init_t* __itt_api_init_ptr; - int lib_version = __itt_lib_version(__itt_ittapi_global.lib); + int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); switch (lib_version) { case 0: groups = __itt_group_legacy; case 1: /* Fill all pointers from dynamic library */ - for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++) + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) { - if (__itt_ittapi_global.api_list_ptr[i].group & groups & init_groups) + if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) { - *__itt_ittapi_global.api_list_ptr[i].func_ptr = (void*)__itt_get_proc(__itt_ittapi_global.lib, __itt_ittapi_global.api_list_ptr[i].name); - if (*__itt_ittapi_global.api_list_ptr[i].func_ptr == NULL) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) { /* Restore pointers for function with static implementation */ - *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func; - __itt_report_error(__itt_error_no_symbol, lib_name, __itt_ittapi_global.api_list_ptr[i].name); + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); #ifdef ITT_COMPLETE_GROUP - zero_group = (__itt_group_id)(zero_group | __itt_ittapi_global.api_list_ptr[i].group); + zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); #endif /* ITT_COMPLETE_GROUP */ } } else - *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func; + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; } if (groups == __itt_group_legacy) @@ -934,15 +881,15 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou } #ifdef ITT_COMPLETE_GROUP - for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++) - if (__itt_ittapi_global.api_list_ptr[i].group & zero_group) - *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func; + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; #endif /* ITT_COMPLETE_GROUP */ break; case 2: - __itt_api_init_ptr = (__itt_api_init_t*)__itt_get_proc(__itt_ittapi_global.lib, "__itt_api_init"); + __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); if (__itt_api_init_ptr) - __itt_api_init_ptr(&__itt_ittapi_global, init_groups); + __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); break; } } @@ -963,7 +910,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou { __itt_nullify_all_pointers(); } - __itt_ittapi_global.api_initialized = 1; + _N_(_ittapi_global).api_initialized = 1; current_thread = 0; /* !!! Just to avoid unused code elimination !!! */ if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; @@ -971,25 +918,26 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou } #ifndef ITT_SIMPLE_INIT - __itt_mutex_unlock(&__itt_ittapi_global.mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); #endif /* ITT_SIMPLE_INIT */ } /* Evaluating if any function ptr is non empty and it's in init_groups */ - for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++) - if (*__itt_ittapi_global.api_list_ptr[i].func_ptr != __itt_ittapi_global.api_list_ptr[i].null_func && - __itt_ittapi_global.api_list_ptr[i].group & init_groups) + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && + _N_(_ittapi_global).api_list_ptr[i].group & init_groups) return 1; return 0; } ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) { - __itt_error_handler_t* prev = (__itt_error_handler_t*)__itt_ittapi_global.error_handler; - __itt_ittapi_global.error_handler = (void*)handler; + __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + _N_(_ittapi_global).error_handler = (void*)(size_t)handler; return prev; } #if ITT_PLATFORM==ITT_PLATFORM_WIN #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h index 1e9eb43f6e0..fe1fe3c14f7 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h @@ -60,6 +60,8 @@ ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (IT ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args") #endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") + #else /* __ITT_INTERNAL_INIT */ #if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -78,6 +80,11 @@ ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr), (ITT_FORMAT addr), sync_c ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p") ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p") +ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args") +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d") +ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d") + ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p") ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p") ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p") @@ -95,16 +102,26 @@ ITT_STUBV(ITTAPI, void, model_induction_uses, (void* addr, size_t size), (I ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d") ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d") ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p") -ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args") #ifndef __ITT_INTERNAL_BODY #if ITT_PLATFORM==ITT_PLATFORM_WIN ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"") ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"") ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"") ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args") +ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args") +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d") +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args") #endif /* __ITT_INTERNAL_BODY */ #ifndef __ITT_INTERNAL_BODY @@ -123,16 +140,23 @@ ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* ad ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d") ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args") ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u") +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u") ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu") ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu") +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args") + ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p") ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu") #ifndef __ITT_INTERNAL_BODY ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p") ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu") #endif /* __ITT_INTERNAL_BODY */ ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p") @@ -280,4 +304,13 @@ ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track), ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args") #endif /* __ITT_INTERNAL_BODY */ +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ + #endif /* __ITT_INTERNAL_INIT */ diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h index 27991730fdb..3695a67089b 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h @@ -29,6 +29,8 @@ typedef enum ___itt_group_id __itt_group_heap = 1<<11, __itt_group_splitter_max = 1<<12, __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_arrays = 1<<14, __itt_group_all = -1 } __itt_group_id; @@ -57,6 +59,8 @@ typedef struct ___itt_group_list { __itt_group_stitch, "stitch" }, \ { __itt_group_heap, "heap" }, \ { __itt_group_structure, "structure" }, \ + { __itt_group_suppress, "suppress" }, \ + { __itt_group_arrays, "arrays" }, \ { __itt_group_none, NULL } \ } diff --git a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h index b10676f78d7..99192945010 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h +++ b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h @@ -47,12 +47,18 @@ # define ITT_PLATFORM_POSIX 2 #endif /* ITT_PLATFORM_POSIX */ +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC # else # define ITT_PLATFORM ITT_PLATFORM_POSIX -# endif /* _WIN32 */ +# endif #endif /* ITT_PLATFORM */ #if defined(_UNICODE) && !defined(UNICODE) @@ -73,11 +79,11 @@ # if ITT_PLATFORM==ITT_PLATFORM_WIN # define CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define CDECL /* not actual on x86_64 platform */ -# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# if defined _M_IX86 || defined __i386__ # define CDECL __attribute__ ((cdecl)) -# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# else /* _M_IX86 || __i386__ */ +# define CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* CDECL */ @@ -85,11 +91,11 @@ # if ITT_PLATFORM==ITT_PLATFORM_WIN # define STDCALL __stdcall # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ -# define STDCALL /* not supported on x86_64 platform */ -# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# if defined _M_IX86 || defined __i386__ # define STDCALL __attribute__ ((stdcall)) -# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* STDCALL */ @@ -102,8 +108,8 @@ #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ -#define INLINE __forceinline -#define INLINE_ATTRIBUTE /* nothing */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* * Generally, functions are not inlined unless optimization is specified. @@ -111,11 +117,11 @@ * if no optimization level was specified. */ #ifdef __STRICT_ANSI__ -#define INLINE static +#define ITT_INLINE static #else /* __STRICT_ANSI__ */ -#define INLINE static inline +#define ITT_INLINE static inline #endif /* __STRICT_ANSI__ */ -#define INLINE_ATTRIBUTE __attribute__ ((always_inline)) +#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused)) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /** @endcond */ diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s index 1bfdc0bf8cd..1f1ba1b3884 100644 --- a/openmp/runtime/src/z_Linux_asm.s +++ b/openmp/runtime/src/z_Linux_asm.s @@ -1,7 +1,7 @@ // z_Linux_asm.s: - microtasking routines specifically // written for Intel platforms running Linux* OS -// $Revision: 42582 $ -// $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $ +// $Revision: 42810 $ +// $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ // ////===----------------------------------------------------------------------===// @@ -77,7 +77,7 @@ KMP_PREFIX_UNDERSCORE($0): KMP_PREFIX_UNDERSCORE(\proc): .endm # endif // defined __APPLE__ && defined __MACH__ -#endif // __i386 || defined __x86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 // ----------------------------------------------------------------------- @@ -1573,3 +1573,19 @@ L_kmp_1_exit: // ----------------------------------------------------------------------- #endif /* KMP_ARCH_X86_64 */ + +#if KMP_ARCH_ARM + .data + .comm .gomp_critical_user_,32,8 + .data + .align 4 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .4byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr,4 +#endif /* KMP_ARCH_ARM */ + + +#if defined(__linux__) +.section .note.GNU-stack,"",@progbits +#endif diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c index 4675302a693..27e394f8797 100644 --- a/openmp/runtime/src/z_Linux_util.c +++ b/openmp/runtime/src/z_Linux_util.c @@ -1,7 +1,7 @@ /* * z_Linux_util.c -- platform specific routines. - * $Revision: 42582 $ - * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $ + * $Revision: 42847 $ + * $Date: 2013-11-26 09:10:01 -0600 (Tue, 26 Nov 2013) $ */ @@ -32,7 +32,7 @@ #if KMP_OS_LINUX # include <sys/sysinfo.h> -# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) // We should really include <futex.h>, but that causes compatibility problems on different // Linux* OS distributions that either require that you include (or break when you try to include) // <pci/types.h>. @@ -55,6 +55,12 @@ #include <ctype.h> #include <fcntl.h> +// For non-x86 architecture +#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) +# include <stdbool.h> +# include <ffi.h> +#endif + /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ @@ -112,7 +118,7 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) * stone forever. */ -# if KMP_ARCH_X86 +# if KMP_ARCH_X86 || KMP_ARCH_ARM # ifndef __NR_sched_setaffinity # define __NR_sched_setaffinity 241 # elif __NR_sched_setaffinity != 241 @@ -434,7 +440,7 @@ __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask, /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) int __kmp_futex_determine_capable() @@ -451,7 +457,7 @@ __kmp_futex_determine_capable() return retval; } -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ @@ -2004,43 +2010,21 @@ __kmp_get_xproc( void ) { } // __kmp_get_xproc -/* - Parse /proc/cpuinfo file for processor frequency, return frequency in Hz, or ~ 0 in case of - error. -*/ -static -kmp_uint64 -__kmp_get_frequency_from_proc( -) { +int +__kmp_read_from_file( char const *path, char const *format, ... ) +{ + int result; + va_list args; - kmp_uint64 result = ~ 0; - FILE * file = NULL; - double freq = HUGE_VAL; - int rc; + va_start(args, format); + FILE *f = fopen(path, "rb"); + if ( f == NULL ) + return 0; + result = vfscanf(f, format, args); + fclose(f); - // - // FIXME - use KMP_CPUINFO_FILE here if it is set!!! - // - file = fopen( "/proc/cpuinfo", "r" ); - if ( file == NULL ) { - return result; - }; // if - for ( ; ; ) { - rc = fscanf( file, "cpu MHz : %lf\n", & freq ); // Try to scan frequency. - if ( rc == 1 ) { // Success. - break; - }; // if - fscanf( file, "%*[^\n]\n" ); // Failure -- skip line. - }; // for - fclose( file ); - if ( freq == HUGE_VAL || freq <= 0 ) { - return result; - }; // if - result = (kmp_uint64)( freq * 1.0E+6 ); - KA_TRACE( 5, ( "cpu frequency from /proc/cpuinfo: %" KMP_UINT64_SPEC "\n", result ) ); return result; -} // func __kmp_get_frequency_from_proc - +} void __kmp_runtime_initialize( void ) @@ -2059,15 +2043,6 @@ __kmp_runtime_initialize( void ) }; // if #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - if ( __kmp_cpu_frequency == 0 ) { - // First try nominal frequency. - __kmp_cpu_frequency = __kmp_cpuinfo.frequency; - if ( __kmp_cpu_frequency == 0 || __kmp_cpu_frequency == ~ 0 ) { - // Next Try to get CPU frequency from /proc/cpuinfo. - __kmp_cpu_frequency = __kmp_get_frequency_from_proc(); - }; // if - }; // if - __kmp_xproc = __kmp_get_xproc(); if ( sysconf( _SC_THREADS ) ) { @@ -2536,5 +2511,42 @@ __kmp_get_load_balance( int max ) #endif // USE_LOAD_BALANCE + +#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) + +int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc, + void *p_argv[] ) +{ + int argc_full = argc + 2; + int i; + ffi_cif cif; + ffi_type *types[argc_full]; + void *args[argc_full]; + void *idp[2]; + + /* We're only passing pointers to the target. */ + for (i = 0; i < argc_full; i++) + types[i] = &ffi_type_pointer; + + /* Ugly double-indirection, but that's how it goes... */ + idp[0] = >id; + idp[1] = &tid; + args[0] = &idp[0]; + args[1] = &idp[1]; + + for (i = 0; i < argc; i++) + args[2 + i] = &p_argv[i]; + + if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, argc_full, + &ffi_type_void, types) != FFI_OK) + abort(); + + ffi_call(&cif, (void (*)(void))pkfn, NULL, args); + + return 1; +} + +#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) + // end of file // diff --git a/openmp/runtime/src/z_Windows_NT_util.c b/openmp/runtime/src/z_Windows_NT_util.c index bd22c25be05..ba59110e8b8 100644 --- a/openmp/runtime/src/z_Windows_NT_util.c +++ b/openmp/runtime/src/z_Windows_NT_util.c @@ -1,7 +1,7 @@ /* * z_Windows_NT_util.c -- platform specific routines. - * $Revision: 42518 $ - * $Date: 2013-07-15 11:12:26 -0500 (Mon, 15 Jul 2013) $ + * $Revision: 42816 $ + * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $ */ @@ -391,14 +391,14 @@ __kmp_suspend( int th_gtid, volatile kmp_uint *spinner, kmp_uint checker ) /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread gets called first? */ - old_spin = __kmp_test_then_or32( (volatile kmp_int32 *) spinner, + old_spin = KMP_TEST_THEN_OR32( (volatile kmp_int32 *) spinner, KMP_BARRIER_SLEEP_STATE ); KF_TRACE( 5, ( "__kmp_suspend: T#%d set sleep bit for spin(%p)==%d\n", th_gtid, spinner, *spinner ) ); if ( old_spin == checker ) { - __kmp_test_then_and32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) ); + KMP_TEST_THEN_AND32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) ); KF_TRACE( 5, ( "__kmp_suspend: T#%d false alarm, reset sleep bit for spin(%p)\n", th_gtid, spinner) ); @@ -501,7 +501,7 @@ __kmp_resume( int target_gtid, volatile kmp_uint *spin ) } TCW_PTR(th->th.th_sleep_loc, NULL); - old_spin = __kmp_test_then_and32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) ); + old_spin = KMP_TEST_THEN_AND32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) ); if ( ( old_spin & KMP_BARRIER_SLEEP_STATE ) == 0 ) { KF_TRACE( 5, ( "__kmp_resume: T#%d exiting, thread T#%d already awake - spin(%p): " @@ -874,24 +874,6 @@ __kmp_runtime_initialize( void ) }; // if #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - if ( __kmp_cpu_frequency == 0 ) { - // __kmp_hardware_timestamp() calls to QueryPerformanceCounter(). If - // __kmp_hardware_timestamp() rewritten to use RDTSC instruction (or its 64 analog), - // probably we should try to get frequency from __kmp_cpuinfo.frequency first (see - // z_Linux_util.c). - LARGE_INTEGER freq; - BOOL rc; - rc = QueryPerformanceFrequency( & freq ); - if ( rc ) { - KMP_DEBUG_ASSERT( sizeof( __kmp_cpu_frequency ) >= sizeof( freq.QuadPart ) ); - KMP_DEBUG_ASSERT( freq.QuadPart >= 0 ); - __kmp_cpu_frequency = freq.QuadPart; - KA_TRACE( 5, ( "cpu frequency: %" KMP_UINT64_SPEC "\n", __kmp_cpu_frequency ) ); - } else { - __kmp_cpu_frequency = ~ 0; - }; // if - }; // if - /* Set up minimum number of threads to switch to TLS gtid */ #if KMP_OS_WINDOWS && ! defined GUIDEDLL_EXPORTS // Windows* OS, static library. |