diff options
author | Andrey Churbanov <Andrey.Churbanov@intel.com> | 2019-06-05 16:14:47 +0000 |
---|---|---|
committer | Andrey Churbanov <Andrey.Churbanov@intel.com> | 2019-06-05 16:14:47 +0000 |
commit | d47f5488cf02fa06259a0f8563f684e2d45165c9 (patch) | |
tree | 456b83d445bfae2315d234f4e856998a235930d2 | |
parent | de586bd1fd57f3d4438fa9fdfcc7406727a90004 (diff) | |
download | bcm5719-llvm-d47f5488cf02fa06259a0f8563f684e2d45165c9.tar.gz bcm5719-llvm-d47f5488cf02fa06259a0f8563f684e2d45165c9.zip |
Added propagation of not big initial stack size of master thread to workers.
Currently implemented only for non-Windows 64-bit platforms.
Differential Revision: https://reviews.llvm.org/D62488
llvm-svn: 362618
-rw-r--r-- | openmp/runtime/src/kmp.h | 1 | ||||
-rw-r--r-- | openmp/runtime/src/kmp_settings.cpp | 14 | ||||
-rw-r--r-- | openmp/runtime/src/z_Linux_util.cpp | 11 | ||||
-rw-r--r-- | openmp/runtime/test/misc_bugs/stack-propagate.c | 65 |
4 files changed, 91 insertions, 0 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 0133108b7e1..f1a8f3814ee 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3263,6 +3263,7 @@ extern void __kmp_init_random(kmp_info_t *thread); extern kmp_r_sched_t __kmp_get_schedule_global(void); extern void __kmp_adjust_num_threads(int new_nproc); +extern void __kmp_check_stksize(size_t *val); extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL); extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL); diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 114cd9c9bac..1afba5bb575 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -289,6 +289,20 @@ static void __kmp_stg_parse_bool(char const *name, char const *value, } } // __kmp_stg_parse_bool +// placed here in order to use __kmp_round4k static function +void __kmp_check_stksize(size_t *val) { + // if system stack size is too big then limit the size for worker threads + if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics... + *val = KMP_DEFAULT_STKSIZE * 16; + if (*val < KMP_MIN_STKSIZE) + *val = KMP_MIN_STKSIZE; + if (*val > KMP_MAX_STKSIZE) + *val = KMP_MAX_STKSIZE; // dead code currently, but may work in future +#if KMP_OS_DARWIN + *val = __kmp_round4k(*val); +#endif // KMP_OS_DARWIN +} + static void __kmp_stg_parse_size(char const *name, char const *value, size_t size_min, size_t size_max, int *is_specified, size_t *out, diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 7eb782a33a3..b1cf8299f1b 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -1833,6 +1833,17 @@ void __kmp_runtime_initialize(void) { __kmp_xproc = __kmp_get_xproc(); +#if ! KMP_32_BIT_ARCH + struct rlimit rlim; + // read stack size of calling thread, save it as default for worker threads; + // this should be done before reading environment variables + status = getrlimit(RLIMIT_STACK, &rlim); + if (status == 0) { // success? + __kmp_stksize = rlim.rlim_cur; + __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed + } +#endif /* KMP_32_BIT_ARCH */ + if (sysconf(_SC_THREADS)) { /* Query the maximum number of threads */ diff --git a/openmp/runtime/test/misc_bugs/stack-propagate.c b/openmp/runtime/test/misc_bugs/stack-propagate.c new file mode 100644 index 00000000000..ac289b56ccd --- /dev/null +++ b/openmp/runtime/test/misc_bugs/stack-propagate.c @@ -0,0 +1,65 @@ +// RUN: %libomp-compile-and-run + +// https://bugs.llvm.org/show_bug.cgi?id=26540 requested +// stack size to be propagated from master to workers. +// Library implements propagation of not too big stack +// for Linux x86_64 platform (skipped Windows for now). +// +// The test checks that workers can use more than 4MB +// of stack (4MB - was historical default for +// stack size of worker thread in runtime library). + +#include <stdio.h> +#include <omp.h> +#if !defined(_WIN32) +#include <sys/resource.h> // getrlimit +#endif + +#define STK 4800000 + +double foo(int n, int th) +{ + double arr[n]; + int i; + double res = 0.0; + for (i = 0; i < n; ++i) { + arr[i] = (double)i / (n + 2); + } + for (i = 0; i < n; ++i) { + res += arr[i] / n; + } + return res; +} + +int main(int argc, char *argv[]) +{ +#if defined(_WIN32) + // don't test Windows + printf("stack propagation not implemented, skipping test...\n"); + return 0; +#else + int status; + double val = 0.0; + int m = STK / 8; // > 4800000 bytes per thread + // read stack size of calling thread, save it as default + struct rlimit rlim; + status = getrlimit(RLIMIT_STACK, &rlim); + if (sizeof(void *) > 4 && // do not test 32-bit systems, + status == 0 && rlim.rlim_cur > STK) { // or small initial stack size +#pragma omp parallel reduction(+:val) + { + val += foo(m, omp_get_thread_num()); + } + } else { + printf("too small stack size limit (needs about 8MB), skipping test...\n"); + return 0; + } + if (val > 0.1) { + printf("passed\n"); + return 0; + } else { + printf("failed, val = %f\n", val); + return 1; + } +#endif // _WIN32 +} |