summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src
diff options
context:
space:
mode:
authorAndrey Churbanov <Andrey.Churbanov@intel.com>2015-08-18 10:08:27 +0000
committerAndrey Churbanov <Andrey.Churbanov@intel.com>2015-08-18 10:08:27 +0000
commit92effc4565fa5bdc332c9d11c9aac9e96f2a2e85 (patch)
tree68b01e677b03e4b4458e4a6de5b6a1f2133c6623 /openmp/runtime/src
parentf1ae367a990c169e21da1e2904684f43617216f5 (diff)
downloadbcm5719-llvm-92effc4565fa5bdc332c9d11c9aac9e96f2a2e85.tar.gz
bcm5719-llvm-92effc4565fa5bdc332c9d11c9aac9e96f2a2e85.zip
D11990: Lock-free start of serialized parallel regions
llvm-svn: 245286
Diffstat (limited to 'openmp/runtime/src')
-rw-r--r--openmp/runtime/src/kmp_runtime.c55
1 files changed, 30 insertions, 25 deletions
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 6b2a5c69547..64ca8f2f277 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -875,24 +875,6 @@ __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
KMP_DEBUG_ASSERT( root && parent_team );
//
- // Initial check to see if we should use a serialized team.
- //
- if ( set_nthreads == 1 ) {
- KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
- __kmp_get_gtid(), set_nthreads ));
- return 1;
- }
- if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
-#if OMP_40_ENABLED
- && !enter_teams
-#endif /* OMP_40_ENABLED */
- ) ) || ( __kmp_library == library_serial ) ) {
- KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
- __kmp_get_gtid(), set_nthreads ));
- return 1;
- }
-
- //
// If dyn-var is set, dynamically adjust the number of desired threads,
// according to the method specified by dynamic_mode.
//
@@ -1720,23 +1702,47 @@ __kmp_fork_call(
}
#endif
- /* determine how many new threads we can use */
- __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
-
if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
nthreads = 1;
} else {
+#if OMP_40_ENABLED
+ int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
+#endif
nthreads = master_set_numthreads ?
master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task
- nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
+
+ // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct).
+ // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels.
+ if (nthreads > 1) {
+ if ( ( !get__nested(master_th) && (root->r.r_in_parallel
+#if OMP_40_ENABLED
+ && !enter_teams
+#endif /* OMP_40_ENABLED */
+ ) ) || ( __kmp_library == library_serial ) ) {
+ KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n",
+ gtid, nthreads ));
+ nthreads = 1;
+ }
+ }
+ if ( nthreads > 1 ) {
+ /* determine how many new threads we can use */
+ __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
+
+ nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
#if OMP_40_ENABLED
/* AC: If we execute teams from parallel region (on host), then teams should be created
but each can only have 1 thread if nesting is disabled. If teams called from serial region,
then teams and their threads should be created regardless of the nesting setting. */
- , ((ap==NULL && active_level==0) ||
- (ap && teams_level>0 && teams_level==level))
+ , enter_teams
#endif /* OMP_40_ENABLED */
);
+ if ( nthreads == 1 ) {
+ // Free lock for single thread execution here;
+ // for multi-thread execution it will be freed later
+ // after team of threads created and initialized
+ __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
+ }
+ }
}
KMP_DEBUG_ASSERT( nthreads > 0 );
@@ -1753,7 +1759,6 @@ __kmp_fork_call(
void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) );
#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */
- __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
__kmpc_serialized_parallel(loc, gtid);
OpenPOWER on IntegriCloud