diff options
| author | Andrey Churbanov <Andrey.Churbanov@intel.com> | 2017-07-19 09:26:13 +0000 |
|---|---|---|
| committer | Andrey Churbanov <Andrey.Churbanov@intel.com> | 2017-07-19 09:26:13 +0000 |
| commit | c7476ed0be5deb000528e7b540ddb849c95be28c (patch) | |
| tree | 0fce2d950c00e19fffca48ce357d72bbf9f56b7a /openmp/runtime/src | |
| parent | b05a55787a61e063f868fdb55c4f6fb0ee8bd618 (diff) | |
| download | bcm5719-llvm-c7476ed0be5deb000528e7b540ddb849c95be28c.tar.gz bcm5719-llvm-c7476ed0be5deb000528e7b540ddb849c95be28c.zip | |
OpenMP RTL cleanup: two PAUSEs per spin loop iteration replaced with single one
Differential Revision: https://reviews.llvm.org/D35490
llvm-svn: 308423
Diffstat (limited to 'openmp/runtime/src')
| -rw-r--r-- | openmp/runtime/src/kmp.h | 10 | ||||
| -rw-r--r-- | openmp/runtime/src/kmp_wait_release.h | 9 |
2 files changed, 13 insertions, 6 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index e7ccc3031fb..40d65b8c651 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1040,7 +1040,11 @@ extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p); #if KMP_ARCH_X86 extern void __kmp_x86_pause(void); #elif KMP_MIC -static void __kmp_x86_pause(void) { _mm_delay_32(100); } +// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed +// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing +// the delay from 100 to 300 showed even better performance than double PAUSE +// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC. +static void __kmp_x86_pause(void) { _mm_delay_32(300); } #else static void __kmp_x86_pause(void) { _mm_pause(); } #endif @@ -1076,7 +1080,7 @@ static void __kmp_x86_pause(void) { _mm_pause(); } KMP_CPU_PAUSE(); \ (count) -= 2; \ if (!(count)) { \ - KMP_YIELD(cond); \ + __kmp_yield(cond); \ (count) = __kmp_yield_next; \ } \ } @@ -1085,7 +1089,7 @@ static void __kmp_x86_pause(void) { _mm_pause(); } KMP_CPU_PAUSE(); \ (count) -= 2; \ if (!(count)) { \ - KMP_YIELD(1); \ + __kmp_yield(1); \ (count) = __kmp_yield_next; \ } \ } diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index e223dc0c9b9..3aac2027b01 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -47,7 +47,7 @@ enum flag_type { */ template <typename P> class kmp_flag { volatile P - *loc; /**< Pointer to the flag storage that is modified by another thread + *loc; /**< Pointer to the flag storage that is modified by another thread */ flag_type t; /**< "Type" of the flag in loc */ public: @@ -225,11 +225,14 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag, // If we are oversubscribed, or have waited a bit (and // KMP_LIBRARY=throughput), then yield - KMP_YIELD(oversubscribed); // TODO: Should it be number of cores instead of thread contexts? Like: // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); // Need performance improvement data to make the change... - KMP_YIELD_SPIN(spins); + if (oversubscribed) { + KMP_YIELD(1); + } else { + KMP_YIELD_SPIN(spins); + } // Check if this thread was transferred from a team // to the thread pool (or vice-versa) while spinning. in_pool = !!TCR_4(this_thr->th.th_in_pool); |

