summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r--openmp/runtime/src/CMakeLists.txt62
-rw-r--r--openmp/runtime/src/kmp.h14
-rw-r--r--openmp/runtime/src/kmp_csupport.c13
-rw-r--r--openmp/runtime/src/kmp_ftn_os.h2
-rw-r--r--openmp/runtime/src/kmp_global.c4
-rw-r--r--openmp/runtime/src/kmp_gsupport.c2
-rw-r--r--openmp/runtime/src/kmp_lock.h2
-rw-r--r--openmp/runtime/src/kmp_os.h25
-rw-r--r--openmp/runtime/src/kmp_runtime.c2
-rw-r--r--openmp/runtime/src/kmp_settings.c3
-rw-r--r--openmp/runtime/src/makefile.mk29
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h9
-rw-r--r--openmp/runtime/src/z_Linux_asm.s12
-rw-r--r--openmp/runtime/src/z_Linux_util.c121
14 files changed, 259 insertions, 41 deletions
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index dd29966db7d..bfc8540bbb0 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -22,11 +22,17 @@ else()
message(FATAL_ERROR "Unsupported OS")
endif()
-if("${ARCH}" STREQUAL "")
+if(arch)
+ set(ARCH ${arch}) #acquire from command line
+else() #assume default
set(ARCH "32e")
endif()
-set(ARCH_STR "Intel(R) 64")
+if("${arch}" STREQUAL "32e")
+ set(ARCH_STR "Intel(R) 64")
+elseif("${arch}" STREQUAL "ppc64")
+ set(ARCH_STR "PPC64")
+endif()
set(FEATURE_FLAGS "-D USE_ITT_BUILD")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D NDEBUG")
@@ -36,13 +42,22 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D _REENTRANT")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ASSERT")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D BUILD_I8")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D BUILD_TV")
+
if(APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -current_version 5.0")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -compatibility_version 5.0")
endif()
+
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_LIBRARY_FILE=\\\"libiomp5.${OMP_SHLIBEXT}\\\"")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_VERSION_MAJOR=${VERSION}")
-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D CACHE_LINE=64")
+
+# customize cache line size for ppc64 to 128 bytes: 64 in all other cases
+if("${arch}" STREQUAL "ppc64")
+ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D CACHE_LINE=128")
+else()
+ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D CACHE_LINE=64")
+endif()
+
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ADJUST_BLOCKTIME=1")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D BUILD_PARALLEL_ORDERED")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ASM_INTRINS")
@@ -50,8 +65,13 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_LOAD_BALANCE")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_CBLKDATA")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D GUIDEDLL_EXPORTS")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_GOMP_COMPAT")
-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
+
+#adaptive locks use x86 assembly - disable for ppc64
+if("${arch}" STREQUAL "32e" OR "${arch}" STREQUAL "32" OR "${arch}" STREQUAL "arm")
+ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
+ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
+endif()
+
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_50_ENABLED=0")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_41_ENABLED=0")
set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_40_ENABLED=1")
@@ -122,16 +142,38 @@ add_custom_command(
OUTPUT omp.h
COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/../tools/expand-vars.pl --strict -D Revision=\"\\$$Revision\" -D Date=\"\\$$Date\" -D KMP_TYPE=\"Performance\" -D KMP_ARCH=\"\\\"${ARCH_STR}\\\"\" -D KMP_VERSION_MAJOR=${VERSION} -D KMP_VERSION_MINOR=0 -D KMP_VERSION_BUILD=00000000 -D KMP_BUILD_DATE=\"${BUILD_TIME} UTC\" -D KMP_TARGET_COMPILER=12 -D KMP_DIAG=0 -D KMP_DEBUG_INFO=0 -D OMP_VERSION=${OMP_VERSION} ${CMAKE_CURRENT_SOURCE_DIR}/include/${OMP_VERSION_NUM}/omp.h.var omp.h
)
-add_custom_command(
- OUTPUT z_Linux_asm.o
- COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
-)
+
+if("${ARCH}" STREQUAL "ppc64")
+ add_custom_command(
+ OUTPUT z_Linux_asm.o
+ COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_PPC64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
+ )
+else()
+ add_custom_command(
+ OUTPUT z_Linux_asm.o
+ COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
+ )
+endif()
+
add_custom_target(gen_kmp_i18n DEPENDS kmp_i18n_id.inc kmp_i18n_default.inc omp.h z_Linux_asm.o)
if(NOT APPLE)
- set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt")
+ if(${ARCH} STREQUAL "ppc64" AND ${OS_GEN} STREQUAL "lin")
+ set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt -ldl")
+ else()
+ set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt")
+ endif()
endif()
add_library(iomp5 SHARED ${SOURCES} z_Linux_asm.o)
+
+# This is a workaround to a known ppc64 issue about libpthread. For more
+# information see
+# http://ryanarn.blogspot.com/2011/07/curious-case-of-pthreadatfork-on.html
+if("${ARCH}" STREQUAL "ppc64")
+ find_library(PTHREAD NAMES pthread)
+ target_link_libraries(iomp5 ${PTHREAD})
+endif()
+
add_dependencies(iomp5 gen_kmp_i18n)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index a8c600b9c86..f5dd10f8baa 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -459,9 +459,9 @@ typedef int PACKED_REDUCTION_METHOD_T;
/*
* Only Linux* OS and Windows* OS support thread affinity.
*/
-#if KMP_OS_LINUX || KMP_OS_WINDOWS
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64
# define KMP_AFFINITY_SUPPORTED 1
-#elif KMP_OS_DARWIN || KMP_OS_FREEBSD
+#elif KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_CNK || KMP_ARCH_PPC64
// affinity not supported
# define KMP_AFFINITY_SUPPORTED 0
#else
@@ -476,7 +476,7 @@ extern size_t __kmp_affin_mask_size;
# if KMP_OS_LINUX
//
-// On Linux* OS, the mask isactually a vector of length __kmp_affin_mask_size
+// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
// (in bytes). It should be allocated on a word boundary.
//
// WARNING!!! We have made the base type of the affinity mask unsigned char,
@@ -946,6 +946,9 @@ extern unsigned int __kmp_place_core_offset;
#if KMP_OS_WINDOWS
# define KMP_INIT_WAIT 64U /* initial number of spin-tests */
# define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
+#elif KMP_OS_CNK
+# define KMP_INIT_WAIT 16U /* initial number of spin-tests */
+# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
#elif KMP_OS_LINUX
# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
@@ -971,6 +974,11 @@ extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
extern void __kmp_x86_pause( void );
# endif
# define KMP_CPU_PAUSE() __kmp_x86_pause()
+#elif KMP_ARCH_PPC64
+# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
+# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
+# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
+# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0)
#else
# define KMP_CPU_PAUSE() /* nothing to do */
#endif
diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c
index 06f2819db87..780bd41cb78 100644
--- a/openmp/runtime/src/kmp_csupport.c
+++ b/openmp/runtime/src/kmp_csupport.c
@@ -837,6 +837,19 @@ __kmpc_flush(ident_t *loc, ...)
#endif // KMP_MIC
#elif KMP_ARCH_ARM
// Nothing yet
+ #elif KMP_ARCH_PPC64
+ // Nothing needed here (we have a real MB above).
+ #if KMP_OS_CNK
+ // The flushing thread needs to yield here; this prevents a
+ // busy-waiting thread from saturating the pipeline. flush is
+ // often used in loops like this:
+ // while (!flag) {
+ // #pragma omp flush(flag)
+ // }
+ // and adding the yield here is good for at least a 10x speedup
+ // when running >2 threads per core (on the NAS LU benchmark).
+ __kmp_yield(TRUE);
+ #endif
#else
#error Unknown or unsupported architecture
#endif
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index f241751c7e9..d78d846d136 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -478,7 +478,7 @@
//#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update
#define KMP_API_NAME_GOMP_TEAMS GOMP_teams
-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK && !KMP_ARCH_PPC64
#define xstr(x) str(x)
#define str(x) #x
diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c
index aa1f8e3fad3..d3c31952d0f 100644
--- a/openmp/runtime/src/kmp_global.c
+++ b/openmp/runtime/src/kmp_global.c
@@ -321,7 +321,11 @@ int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK speci
kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
kmp_uint32 __kmp_yielding_on = 1;
+#if KMP_OS_CNK
+kmp_uint32 __kmp_yield_cycle = 0;
+#else
kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */
+#endif
kmp_int32 __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */
kmp_int32 __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */
/* ----------------------------------------------------- */
diff --git a/openmp/runtime/src/kmp_gsupport.c b/openmp/runtime/src/kmp_gsupport.c
index 9d8e5530cd3..aa520249674 100644
--- a/openmp/runtime/src/kmp_gsupport.c
+++ b/openmp/runtime/src/kmp_gsupport.c
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
-#if defined(__x86_64)
+#if defined(__x86_64) || defined (__powerpc64__)
# define KMP_I8
#endif
#include "kmp.h"
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 8009d18cb5a..c5ce83823c0 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -518,7 +518,7 @@ __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
// Internal RTL locks are also implemented as ticket locks, for now.
//
// FIXME - We should go through and figure out which lock kind works best for
-// each internal lock, and use the type deeclaration and function calls for
+// each internal lock, and use the type declaration and function calls for
// that explicit lock kind (and get rid of this section).
//
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index a71055339a4..489f8f7a323 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -70,10 +70,12 @@
#define KMP_OS_FREEBSD 0
#define KMP_OS_DARWIN 0
#define KMP_OS_WINDOWS 0
+#define KMP_OS_CNK 0
#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */
#define KMP_ARCH_X86 0
#define KMP_ARCH_X86_64 0
+#define KMP_ARCH_PPC64 0
#ifdef _WIN32
# undef KMP_OS_WINDOWS
@@ -85,9 +87,14 @@
# define KMP_OS_DARWIN 1
#endif
+// in some ppc64 linux installations, only the second condition is met
#if ( defined __linux )
# undef KMP_OS_LINUX
# define KMP_OS_LINUX 1
+#elif ( defined __linux__)
+# undef KMP_OS_LINUX
+# define KMP_OS_LINUX 1
+#else
#endif
#if ( defined __FreeBSD__ )
@@ -95,6 +102,11 @@
# define KMP_OS_FREEBSD 1
#endif
+#if ( defined __bgq__ )
+# undef KMP_OS_CNK
+# define KMP_OS_CNK 1
+#endif
+
#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS)
# error Unknown OS
#endif
@@ -121,6 +133,9 @@
# elif defined __i386
# undef KMP_ARCH_X86
# define KMP_ARCH_X86 1
+# elif defined __powerpc64__
+# undef KMP_ARCH_PPC64
+# define KMP_ARCH_PPC64 1
# endif
#endif
@@ -160,7 +175,7 @@
# define KMP_ARCH_ARM 1
#endif
-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM)
+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64)
# error Unknown or unsupported architecture
#endif
@@ -238,7 +253,7 @@
#if KMP_ARCH_X86 || KMP_ARCH_ARM
# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
-#elif KMP_ARCH_X86_64
+#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64
# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
#else
# error "Can't determine size_t printf format specifier."
@@ -663,6 +678,10 @@ extern kmp_real64 __kmp_test_then_add_real64 ( volatile kmp_real64 *p, kmp_real6
# endif
#endif /* KMP_OS_WINDOWS */
+#if KMP_ARCH_PPC64
+# define KMP_MB() __sync_synchronize()
+#endif
+
#ifndef KMP_MB
# define KMP_MB() /* nothing to do */
#endif
@@ -769,7 +788,7 @@ typedef void (*microtask_t)( int *gtid, int *npr, ... );
#endif /* KMP_I8 */
/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX
# define STATIC_EFI2_WORKAROUND
#else
# define STATIC_EFI2_WORKAROUND static
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index fea41d0d9ae..d243700079b 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -8450,7 +8450,7 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
- #if KMP_ARCH_X86_64
+ #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64
#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
#if KMP_MIC
diff --git a/openmp/runtime/src/kmp_settings.c b/openmp/runtime/src/kmp_settings.c
index 54745cb8162..b85678e28fc 100644
--- a/openmp/runtime/src/kmp_settings.c
+++ b/openmp/runtime/src/kmp_settings.c
@@ -536,6 +536,7 @@ __kmp_stg_parse_file(
static char * par_range_to_print = NULL;
+#ifdef KMP_DEBUG
static void
__kmp_stg_parse_par_range(
char const * name,
@@ -614,7 +615,7 @@ __kmp_stg_parse_par_range(
break;
}
} // __kmp_stg_parse_par_range
-
+#endif
int
__kmp_initial_threads_capacity( int req_nproc )
diff --git a/openmp/runtime/src/makefile.mk b/openmp/runtime/src/makefile.mk
index 2fa2e03bc04..b4ed8e58067 100644
--- a/openmp/runtime/src/makefile.mk
+++ b/openmp/runtime/src/makefile.mk
@@ -313,6 +313,9 @@ endif
ifeq "$(CPLUSPLUS)" "on"
ifeq "$(os)" "win"
c-flags += -TP
+ else ifeq "$(arch)" "ppc64"
+ # c++0x on ppc64 linux removes definition of preproc. macros, needed in .hs
+ c-flags += -x c++ -std=gnu++0x
else
ifneq "$(filter gcc clang,$(c))" ""
c-flags += -x c++ -std=c++0x
@@ -373,7 +376,7 @@ ifeq "$(os)" "lin"
ld-flags-extra += -lirc_pic
endif
endif
- ifeq "$(filter 32 32e 64,$(arch))" ""
+ ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
ld-flags-extra += $(shell pkg-config --libs libffi)
endif
else
@@ -469,7 +472,14 @@ ifneq "$(os)" "win"
endif
cpp-flags += -D KMP_LIBRARY_FILE=\"$(lib_file)\"
cpp-flags += -D KMP_VERSION_MAJOR=$(VERSION)
-cpp-flags += -D CACHE_LINE=64
+
+# customize ppc64 cache line size to 128, 64 otherwise
+ifeq "$(arch)" "ppc64"
+ cpp-flags += -D CACHE_LINE=128
+else
+ cpp-flags += -D CACHE_LINE=64
+endif
+
cpp-flags += -D KMP_ADJUST_BLOCKTIME=1
cpp-flags += -D BUILD_PARALLEL_ORDERED
cpp-flags += -D KMP_ASM_INTRINS
@@ -584,9 +594,12 @@ ifneq "$(os)" "win"
ifeq "$(arch)" "arm"
z_Linux_asm$(obj) : \
cpp-flags += -D KMP_ARCH_ARM
- else
+ else ifeq "$(arch)" "ppc64"
z_Linux_asm$(obj) : \
- cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+ cpp-flags += -D KMP_ARCH_PPC64
+ else
+ z_Linux_asm$(obj) : \
+ cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
endif
endif
@@ -735,7 +748,9 @@ endif
else # 5
lib_c_items += kmp_gsupport
endif
+# ifneq "$(arch)" "ppc64"
lib_asm_items += z_Linux_asm
+# endif
endif
endif
@@ -1397,9 +1412,13 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))" ""
td_exp += libc.so.6
td_exp += ld-linux-armhf.so.3
endif
+ ifeq "$(arch)" "ppc64"
+ td_exp += libc.so.6
+ td_exp += ld64.so.1
+ endif
td_exp += libdl.so.2
td_exp += libgcc_s.so.1
- ifeq "$(filter 32 32e 64,$(arch))" ""
+ ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
td_exp += libffi.so.6
td_exp += libffi.so.5
endif
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index 40c8614d222..9e7b36b5890 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -132,6 +132,11 @@
# define ITT_ARCH_ARM 4
#endif /* ITT_ARCH_ARM */
+#ifndef ITT_ARCH_PPC64
+# define ITT_ARCH_PPC64 5
+#endif /* ITT_ARCH_PPC64 */
+
+
#ifndef ITT_ARCH
# if defined _M_IX86 || defined __i386__
# define ITT_ARCH ITT_ARCH_IA32
@@ -141,6 +146,8 @@
# define ITT_ARCH ITT_ARCH_IA64
# elif defined _M_ARM || __arm__
# define ITT_ARCH ITT_ARCH_ARM
+# elif defined __powerpc64__
+# define ITT_ARCH ITT_ARCH_PPC64
# endif
#endif
@@ -274,7 +281,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
: "memory");
return result;
}
-#elif ITT_ARCH==ITT_ARCH_ARM
+#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64
#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT
diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s
index 1f1ba1b3884..64c80522614 100644
--- a/openmp/runtime/src/z_Linux_asm.s
+++ b/openmp/runtime/src/z_Linux_asm.s
@@ -138,7 +138,7 @@ __kmp_unnamed_critical_addr:
#endif /* KMP_GOMP_COMPAT */
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
// -----------------------------------------------------------------------
// microtasking routines specifically written for IA-32 architecture
@@ -1585,6 +1585,16 @@ __kmp_unnamed_critical_addr:
.size __kmp_unnamed_critical_addr,4
#endif /* KMP_ARCH_ARM */
+#if KMP_ARCH_PPC64
+ .data
+ .comm .gomp_critical_user_,32,8
+ .data
+ .align 8
+ .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+ .8byte .gomp_critical_user_
+ .size __kmp_unnamed_critical_addr,8
+#endif /* KMP_ARCH_PPC64 */
#if defined(__linux__)
.section .note.GNU-stack,"",@progbits
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index 7633f990a73..348f5d88fee 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -32,7 +32,7 @@
#include <sys/resource.h>
#include <sys/syscall.h>
-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK
# include <sys/sysinfo.h>
# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// We should really include <futex.h>, but that causes compatibility problems on different
@@ -61,7 +61,7 @@
#include <fcntl.h>
// For non-x86 architecture
-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)
# include <stdbool.h>
# include <ffi.h>
#endif
@@ -110,7 +110,7 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond )
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
/*
* Affinity support
@@ -147,6 +147,19 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond )
# error Wrong code for getaffinity system call.
# endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_PPC64
+# ifndef __NR_sched_setaffinity
+# define __NR_sched_setaffinity 222
+# elif __NR_sched_setaffinity != 222
+# error Wrong code for setaffinity system call.
+# endif /* __NR_sched_setaffinity */
+# ifndef __NR_sched_getaffinity
+# define __NR_sched_getaffinity 223
+# elif __NR_sched_getaffinity != 223
+# error Wrong code for getaffinity system call.
+# endif /* __NR_sched_getaffinity */
+
+
# else
# error Unknown or unsupported architecture
@@ -445,7 +458,7 @@ __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask,
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK
int
__kmp_futex_determine_capable()
@@ -462,7 +475,7 @@ __kmp_futex_determine_capable()
return retval;
}
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@@ -481,7 +494,7 @@ __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d )
old_value = TCR_4( *p );
new_value = old_value | d;
- while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_4( *p );
@@ -498,7 +511,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
old_value = TCR_4( *p );
new_value = old_value & d;
- while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_4( *p );
@@ -507,7 +520,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
return old_value;
}
-# if KMP_ARCH_X86
+# if KMP_ARCH_X86 || KMP_ARCH_PPC64
kmp_int64
__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
{
@@ -516,7 +529,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
old_value = TCR_8( *p );
new_value = old_value + d;
- while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_8( *p );
@@ -533,7 +546,7 @@ __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d )
old_value = TCR_8( *p );
new_value = old_value | d;
- while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_8( *p );
@@ -549,7 +562,7 @@ __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d )
old_value = TCR_8( *p );
new_value = old_value & d;
- while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_8( *p );
@@ -2527,7 +2540,7 @@ __kmp_get_load_balance( int max )
#endif // USE_LOAD_BALANCE
-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)
int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
void *p_argv[] )
@@ -2561,7 +2574,89 @@ int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
return 1;
}
-#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)
+
+#if KMP_ARCH_PPC64
+
+// we really only need the case with 1 argument, because CLANG always build
+// a struct of pointers to shared variables referenced in the outlined function
+int
+__kmp_invoke_microtask( microtask_t pkfn,
+ int gtid, int tid,
+ int argc, void *p_argv[] ) {
+ switch (argc) {
+ default:
+ fprintf(stderr, "Too many args to microtask: %d!\n", argc);
+ fflush(stderr);
+ exit(-1);
+ case 0:
+ (*pkfn)(&gtid, &tid);
+ break;
+ case 1:
+ (*pkfn)(&gtid, &tid, p_argv[0]);
+ break;
+ case 2:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
+ break;
+ case 3:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
+ break;
+ case 4:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
+ break;
+ case 5:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
+ break;
+ case 6:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5]);
+ break;
+ case 7:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6]);
+ break;
+ case 8:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7]);
+ break;
+ case 9:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
+ break;
+ case 10:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
+ break;
+ case 11:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
+ break;
+ case 12:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+ p_argv[11]);
+ break;
+ case 13:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+ p_argv[11], p_argv[12]);
+ break;
+ case 14:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+ p_argv[11], p_argv[12], p_argv[13]);
+ break;
+ case 15:
+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
+ p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
+ break;
+ }
+
+ return 1;
+}
+
+#endif
// end of file //
OpenPOWER on IntegriCloud