diff options
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r-- | openmp/runtime/src/CMakeLists.txt | 62 | ||||
-rw-r--r-- | openmp/runtime/src/kmp.h | 14 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_csupport.c | 13 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_ftn_os.h | 2 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_global.c | 4 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_gsupport.c | 2 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_lock.h | 2 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_os.h | 25 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_runtime.c | 2 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_settings.c | 3 | ||||
-rw-r--r-- | openmp/runtime/src/makefile.mk | 29 | ||||
-rw-r--r-- | openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h | 9 | ||||
-rw-r--r-- | openmp/runtime/src/z_Linux_asm.s | 12 | ||||
-rw-r--r-- | openmp/runtime/src/z_Linux_util.c | 121 |
14 files changed, 259 insertions, 41 deletions
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index dd29966db7d..bfc8540bbb0 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -22,11 +22,17 @@ else() message(FATAL_ERROR "Unsupported OS") endif() -if("${ARCH}" STREQUAL "") +if(arch) + set(ARCH ${arch}) #acquire from command line +else() #assume default set(ARCH "32e") endif() -set(ARCH_STR "Intel(R) 64") +if("${arch}" STREQUAL "32e") + set(ARCH_STR "Intel(R) 64") +elseif("${arch}" STREQUAL "ppc64") + set(ARCH_STR "PPC64") +endif() set(FEATURE_FLAGS "-D USE_ITT_BUILD") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D NDEBUG") @@ -36,13 +42,22 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D _REENTRANT") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ASSERT") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D BUILD_I8") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D BUILD_TV") + if(APPLE) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -current_version 5.0") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -compatibility_version 5.0") endif() + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_LIBRARY_FILE=\\\"libiomp5.${OMP_SHLIBEXT}\\\"") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_VERSION_MAJOR=${VERSION}") -set(FEATURE_FLAGS "${FEATURE_FLAGS} -D CACHE_LINE=64") + +# customize cache line size for ppc64 to 128 bytes: 64 in all other cases +if("${arch}" STREQUAL "ppc64") + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D CACHE_LINE=128") +else() + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D CACHE_LINE=64") +endif() + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ADJUST_BLOCKTIME=1") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D BUILD_PARALLEL_ORDERED") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ASM_INTRINS") @@ -50,8 +65,13 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_LOAD_BALANCE") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_CBLKDATA") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D GUIDEDLL_EXPORTS") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_GOMP_COMPAT") -set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1") -set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0") + +#adaptive locks use x86 assembly - disable for ppc64 +if("${arch}" STREQUAL "32e" OR "${arch}" STREQUAL "32" OR "${arch}" STREQUAL "arm") + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1") + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0") +endif() + set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_50_ENABLED=0") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_41_ENABLED=0") set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_40_ENABLED=1") @@ -122,16 +142,38 @@ add_custom_command( OUTPUT omp.h COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/../tools/expand-vars.pl --strict -D Revision=\"\\$$Revision\" -D Date=\"\\$$Date\" -D KMP_TYPE=\"Performance\" -D KMP_ARCH=\"\\\"${ARCH_STR}\\\"\" -D KMP_VERSION_MAJOR=${VERSION} -D KMP_VERSION_MINOR=0 -D KMP_VERSION_BUILD=00000000 -D KMP_BUILD_DATE=\"${BUILD_TIME} UTC\" -D KMP_TARGET_COMPILER=12 -D KMP_DIAG=0 -D KMP_DEBUG_INFO=0 -D OMP_VERSION=${OMP_VERSION} ${CMAKE_CURRENT_SOURCE_DIR}/include/${OMP_VERSION_NUM}/omp.h.var omp.h ) -add_custom_command( - OUTPUT z_Linux_asm.o - COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES} -) + +if("${ARCH}" STREQUAL "ppc64") + add_custom_command( + OUTPUT z_Linux_asm.o + COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_PPC64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES} + ) +else() + add_custom_command( + OUTPUT z_Linux_asm.o + COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES} + ) +endif() + add_custom_target(gen_kmp_i18n DEPENDS kmp_i18n_id.inc kmp_i18n_default.inc omp.h z_Linux_asm.o) if(NOT APPLE) - set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt") + if(${ARCH} STREQUAL "ppc64" AND ${OS_GEN} STREQUAL "lin") + set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt -ldl") + else() + set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt") + endif() endif() add_library(iomp5 SHARED ${SOURCES} z_Linux_asm.o) + +# This is a workaround to a known ppc64 issue about libpthread. For more +# information see +# http://ryanarn.blogspot.com/2011/07/curious-case-of-pthreadatfork-on.html +if("${ARCH}" STREQUAL "ppc64") + find_library(PTHREAD NAMES pthread) + target_link_libraries(iomp5 ${PTHREAD}) +endif() + add_dependencies(iomp5 gen_kmp_i18n) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a8c600b9c86..f5dd10f8baa 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -459,9 +459,9 @@ typedef int PACKED_REDUCTION_METHOD_T; /* * Only Linux* OS and Windows* OS support thread affinity. */ -#if KMP_OS_LINUX || KMP_OS_WINDOWS +#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64 # define KMP_AFFINITY_SUPPORTED 1 -#elif KMP_OS_DARWIN || KMP_OS_FREEBSD +#elif KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_CNK || KMP_ARCH_PPC64 // affinity not supported # define KMP_AFFINITY_SUPPORTED 0 #else @@ -476,7 +476,7 @@ extern size_t __kmp_affin_mask_size; # if KMP_OS_LINUX // -// On Linux* OS, the mask isactually a vector of length __kmp_affin_mask_size +// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size // (in bytes). It should be allocated on a word boundary. // // WARNING!!! We have made the base type of the affinity mask unsigned char, @@ -946,6 +946,9 @@ extern unsigned int __kmp_place_core_offset; #if KMP_OS_WINDOWS # define KMP_INIT_WAIT 64U /* initial number of spin-tests */ # define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ +#elif KMP_OS_CNK +# define KMP_INIT_WAIT 16U /* initial number of spin-tests */ +# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ #elif KMP_OS_LINUX # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ @@ -971,6 +974,11 @@ extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); extern void __kmp_x86_pause( void ); # endif # define KMP_CPU_PAUSE() __kmp_x86_pause() +#elif KMP_ARCH_PPC64 +# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1") +# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2") +# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory") +# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0) #else # define KMP_CPU_PAUSE() /* nothing to do */ #endif diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c index 06f2819db87..780bd41cb78 100644 --- a/openmp/runtime/src/kmp_csupport.c +++ b/openmp/runtime/src/kmp_csupport.c @@ -837,6 +837,19 @@ __kmpc_flush(ident_t *loc, ...) #endif // KMP_MIC #elif KMP_ARCH_ARM // Nothing yet + #elif KMP_ARCH_PPC64 + // Nothing needed here (we have a real MB above). + #if KMP_OS_CNK + // The flushing thread needs to yield here; this prevents a + // busy-waiting thread from saturating the pipeline. flush is + // often used in loops like this: + // while (!flag) { + // #pragma omp flush(flag) + // } + // and adding the yield here is good for at least a 10x speedup + // when running >2 threads per core (on the NAS LU benchmark). + __kmp_yield(TRUE); + #endif #else #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index f241751c7e9..d78d846d136 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -478,7 +478,7 @@ //#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update #define KMP_API_NAME_GOMP_TEAMS GOMP_teams -#if KMP_OS_LINUX +#if KMP_OS_LINUX && !KMP_OS_CNK && !KMP_ARCH_PPC64 #define xstr(x) str(x) #define str(x) #x diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c index aa1f8e3fad3..d3c31952d0f 100644 --- a/openmp/runtime/src/kmp_global.c +++ b/openmp/runtime/src/kmp_global.c @@ -321,7 +321,11 @@ int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK speci kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; kmp_uint32 __kmp_yielding_on = 1; +#if KMP_OS_CNK +kmp_uint32 __kmp_yield_cycle = 0; +#else kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ +#endif kmp_int32 __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */ kmp_int32 __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */ /* ----------------------------------------------------- */ diff --git a/openmp/runtime/src/kmp_gsupport.c b/openmp/runtime/src/kmp_gsupport.c index 9d8e5530cd3..aa520249674 100644 --- a/openmp/runtime/src/kmp_gsupport.c +++ b/openmp/runtime/src/kmp_gsupport.c @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// -#if defined(__x86_64) +#if defined(__x86_64) || defined (__powerpc64__) # define KMP_I8 #endif #include "kmp.h" diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index 8009d18cb5a..c5ce83823c0 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -518,7 +518,7 @@ __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) // Internal RTL locks are also implemented as ticket locks, for now. // // FIXME - We should go through and figure out which lock kind works best for -// each internal lock, and use the type deeclaration and function calls for +// each internal lock, and use the type declaration and function calls for // that explicit lock kind (and get rid of this section). // diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index a71055339a4..489f8f7a323 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -70,10 +70,12 @@ #define KMP_OS_FREEBSD 0 #define KMP_OS_DARWIN 0 #define KMP_OS_WINDOWS 0 +#define KMP_OS_CNK 0 #define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ #define KMP_ARCH_X86 0 #define KMP_ARCH_X86_64 0 +#define KMP_ARCH_PPC64 0 #ifdef _WIN32 # undef KMP_OS_WINDOWS @@ -85,9 +87,14 @@ # define KMP_OS_DARWIN 1 #endif +// in some ppc64 linux installations, only the second condition is met #if ( defined __linux ) # undef KMP_OS_LINUX # define KMP_OS_LINUX 1 +#elif ( defined __linux__) +# undef KMP_OS_LINUX +# define KMP_OS_LINUX 1 +#else #endif #if ( defined __FreeBSD__ ) @@ -95,6 +102,11 @@ # define KMP_OS_FREEBSD 1 #endif +#if ( defined __bgq__ ) +# undef KMP_OS_CNK +# define KMP_OS_CNK 1 +#endif + #if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS) # error Unknown OS #endif @@ -121,6 +133,9 @@ # elif defined __i386 # undef KMP_ARCH_X86 # define KMP_ARCH_X86 1 +# elif defined __powerpc64__ +# undef KMP_ARCH_PPC64 +# define KMP_ARCH_PPC64 1 # endif #endif @@ -160,7 +175,7 @@ # define KMP_ARCH_ARM 1 #endif -#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM) +#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64) # error Unknown or unsupported architecture #endif @@ -238,7 +253,7 @@ #if KMP_ARCH_X86 || KMP_ARCH_ARM # define KMP_SIZE_T_SPEC KMP_UINT32_SPEC -#elif KMP_ARCH_X86_64 +#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 # define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else # error "Can't determine size_t printf format specifier." @@ -663,6 +678,10 @@ extern kmp_real64 __kmp_test_then_add_real64 ( volatile kmp_real64 *p, kmp_real6 # endif #endif /* KMP_OS_WINDOWS */ +#if KMP_ARCH_PPC64 +# define KMP_MB() __sync_synchronize() +#endif + #ifndef KMP_MB # define KMP_MB() /* nothing to do */ #endif @@ -769,7 +788,7 @@ typedef void (*microtask_t)( int *gtid, int *npr, ... ); #endif /* KMP_I8 */ /* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */ -#if KMP_ARCH_X86_64 && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX # define STATIC_EFI2_WORKAROUND #else # define STATIC_EFI2_WORKAROUND static diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c index fea41d0d9ae..d243700079b 100644 --- a/openmp/runtime/src/kmp_runtime.c +++ b/openmp/runtime/src/kmp_runtime.c @@ -8450,7 +8450,7 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - #if KMP_ARCH_X86_64 + #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN #if KMP_MIC diff --git a/openmp/runtime/src/kmp_settings.c b/openmp/runtime/src/kmp_settings.c index 54745cb8162..b85678e28fc 100644 --- a/openmp/runtime/src/kmp_settings.c +++ b/openmp/runtime/src/kmp_settings.c @@ -536,6 +536,7 @@ __kmp_stg_parse_file( static char * par_range_to_print = NULL; +#ifdef KMP_DEBUG static void __kmp_stg_parse_par_range( char const * name, @@ -614,7 +615,7 @@ __kmp_stg_parse_par_range( break; } } // __kmp_stg_parse_par_range - +#endif int __kmp_initial_threads_capacity( int req_nproc ) diff --git a/openmp/runtime/src/makefile.mk b/openmp/runtime/src/makefile.mk index 2fa2e03bc04..b4ed8e58067 100644 --- a/openmp/runtime/src/makefile.mk +++ b/openmp/runtime/src/makefile.mk @@ -313,6 +313,9 @@ endif ifeq "$(CPLUSPLUS)" "on" ifeq "$(os)" "win" c-flags += -TP + else ifeq "$(arch)" "ppc64" + # c++0x on ppc64 linux removes definition of preproc. macros, needed in .hs + c-flags += -x c++ -std=gnu++0x else ifneq "$(filter gcc clang,$(c))" "" c-flags += -x c++ -std=c++0x @@ -373,7 +376,7 @@ ifeq "$(os)" "lin" ld-flags-extra += -lirc_pic endif endif - ifeq "$(filter 32 32e 64,$(arch))" "" + ifeq "$(filter 32 32e 64 ppc64,$(arch))" "" ld-flags-extra += $(shell pkg-config --libs libffi) endif else @@ -469,7 +472,14 @@ ifneq "$(os)" "win" endif cpp-flags += -D KMP_LIBRARY_FILE=\"$(lib_file)\" cpp-flags += -D KMP_VERSION_MAJOR=$(VERSION) -cpp-flags += -D CACHE_LINE=64 + +# customize ppc64 cache line size to 128, 64 otherwise +ifeq "$(arch)" "ppc64" + cpp-flags += -D CACHE_LINE=128 +else + cpp-flags += -D CACHE_LINE=64 +endif + cpp-flags += -D KMP_ADJUST_BLOCKTIME=1 cpp-flags += -D BUILD_PARALLEL_ORDERED cpp-flags += -D KMP_ASM_INTRINS @@ -584,9 +594,12 @@ ifneq "$(os)" "win" ifeq "$(arch)" "arm" z_Linux_asm$(obj) : \ cpp-flags += -D KMP_ARCH_ARM - else + else ifeq "$(arch)" "ppc64" z_Linux_asm$(obj) : \ - cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64) + cpp-flags += -D KMP_ARCH_PPC64 + else + z_Linux_asm$(obj) : \ + cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64) endif endif @@ -735,7 +748,9 @@ endif else # 5 lib_c_items += kmp_gsupport endif +# ifneq "$(arch)" "ppc64" lib_asm_items += z_Linux_asm +# endif endif endif @@ -1397,9 +1412,13 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))" "" td_exp += libc.so.6 td_exp += ld-linux-armhf.so.3 endif + ifeq "$(arch)" "ppc64" + td_exp += libc.so.6 + td_exp += ld64.so.1 + endif td_exp += libdl.so.2 td_exp += libgcc_s.so.1 - ifeq "$(filter 32 32e 64,$(arch))" "" + ifeq "$(filter 32 32e 64 ppc64,$(arch))" "" td_exp += libffi.so.6 td_exp += libffi.so.5 endif diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h index 40c8614d222..9e7b36b5890 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h @@ -132,6 +132,11 @@ # define ITT_ARCH_ARM 4 #endif /* ITT_ARCH_ARM */ +#ifndef ITT_ARCH_PPC64 +# define ITT_ARCH_PPC64 5 +#endif /* ITT_ARCH_PPC64 */ + + #ifndef ITT_ARCH # if defined _M_IX86 || defined __i386__ # define ITT_ARCH ITT_ARCH_IA32 @@ -141,6 +146,8 @@ # define ITT_ARCH ITT_ARCH_IA64 # elif defined _M_ARM || __arm__ # define ITT_ARCH ITT_ARCH_ARM +# elif defined __powerpc64__ +# define ITT_ARCH ITT_ARCH_PPC64 # endif #endif @@ -274,7 +281,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) : "memory"); return result; } -#elif ITT_ARCH==ITT_ARCH_ARM +#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) #endif /* ITT_ARCH==ITT_ARCH_IA64 */ #ifndef ITT_SIMPLE_INIT diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s index 1f1ba1b3884..64c80522614 100644 --- a/openmp/runtime/src/z_Linux_asm.s +++ b/openmp/runtime/src/z_Linux_asm.s @@ -138,7 +138,7 @@ __kmp_unnamed_critical_addr: #endif /* KMP_GOMP_COMPAT */ -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 // ----------------------------------------------------------------------- // microtasking routines specifically written for IA-32 architecture @@ -1585,6 +1585,16 @@ __kmp_unnamed_critical_addr: .size __kmp_unnamed_critical_addr,4 #endif /* KMP_ARCH_ARM */ +#if KMP_ARCH_PPC64 + .data + .comm .gomp_critical_user_,32,8 + .data + .align 8 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .8byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr,8 +#endif /* KMP_ARCH_PPC64 */ #if defined(__linux__) .section .note.GNU-stack,"",@progbits diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c index 7633f990a73..348f5d88fee 100644 --- a/openmp/runtime/src/z_Linux_util.c +++ b/openmp/runtime/src/z_Linux_util.c @@ -32,7 +32,7 @@ #include <sys/resource.h> #include <sys/syscall.h> -#if KMP_OS_LINUX +#if KMP_OS_LINUX && !KMP_OS_CNK # include <sys/sysinfo.h> # if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) // We should really include <futex.h>, but that causes compatibility problems on different @@ -61,7 +61,7 @@ #include <fcntl.h> // For non-x86 architecture -#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64) # include <stdbool.h> # include <ffi.h> #endif @@ -110,7 +110,7 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED +#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) /* * Affinity support @@ -147,6 +147,19 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) # error Wrong code for getaffinity system call. # endif /* __NR_sched_getaffinity */ +# elif KMP_ARCH_PPC64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 222 +# elif __NR_sched_setaffinity != 222 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 223 +# elif __NR_sched_getaffinity != 223 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ + + # else # error Unknown or unsupported architecture @@ -445,7 +458,7 @@ __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask, /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK int __kmp_futex_determine_capable() @@ -462,7 +475,7 @@ __kmp_futex_determine_capable() return retval; } -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ @@ -481,7 +494,7 @@ __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) old_value = TCR_4( *p ); new_value = old_value | d; - while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) + while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) { KMP_CPU_PAUSE(); old_value = TCR_4( *p ); @@ -498,7 +511,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) old_value = TCR_4( *p ); new_value = old_value & d; - while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) + while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) { KMP_CPU_PAUSE(); old_value = TCR_4( *p ); @@ -507,7 +520,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) return old_value; } -# if KMP_ARCH_X86 +# if KMP_ARCH_X86 || KMP_ARCH_PPC64 kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) { @@ -516,7 +529,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) old_value = TCR_8( *p ); new_value = old_value + d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) + while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) { KMP_CPU_PAUSE(); old_value = TCR_8( *p ); @@ -533,7 +546,7 @@ __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) old_value = TCR_8( *p ); new_value = old_value | d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) + while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) { KMP_CPU_PAUSE(); old_value = TCR_8( *p ); @@ -549,7 +562,7 @@ __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) old_value = TCR_8( *p ); new_value = old_value & d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) + while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) { KMP_CPU_PAUSE(); old_value = TCR_8( *p ); @@ -2527,7 +2540,7 @@ __kmp_get_load_balance( int max ) #endif // USE_LOAD_BALANCE -#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64) int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc, void *p_argv[] ) @@ -2561,7 +2574,89 @@ int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc, return 1; } -#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) +#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64) + +#if KMP_ARCH_PPC64 + +// we really only need the case with 1 argument, because CLANG always build +// a struct of pointers to shared variables referenced in the outlined function +int +__kmp_invoke_microtask( microtask_t pkfn, + int gtid, int tid, + int argc, void *p_argv[] ) { + switch (argc) { + default: + fprintf(stderr, "Too many args to microtask: %d!\n", argc); + fflush(stderr); + exit(-1); + case 0: + (*pkfn)(>id, &tid); + break; + case 1: + (*pkfn)(>id, &tid, p_argv[0]); + break; + case 2: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1]); + break; + case 3: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); + break; + case 4: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); + break; + case 5: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); + break; + case 6: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5]); + break; + case 7: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6]); + break; + case 8: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7]); + break; + case 9: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8]); + break; + case 10: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); + break; + case 11: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); + break; + case 12: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11]); + break; + case 13: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12]); + break; + case 14: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13]); + break; + case 15: + (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14]); + break; + } + + return 1; +} + +#endif // end of file // |