3 files changed, 51 insertions, 31 deletions
diff --git a/openmp/CREDITS.txt b/openmp/CREDITS.txt
index 4556ddeaed7..a8ab67ab18e 100644
--- a/openmp/CREDITS.txt
+++ b/openmp/CREDITS.txt
@@ -51,3 +51,7 @@ D: Making build work for FreeBSD.
 
 N: Cheng Wang
 D: Contributor to testsuite from OpenUH
+
+N: Diego Caballero
+E: diego.l.caballero@gmail.com
+D: Fork performance improvements
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 48602168f6f..bac516c9e05 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1951,6 +1951,9 @@ typedef struct kmp_local {
 
 } kmp_local_t;
 
+#define KMP_CHECK_UPDATE(a, b) if ((a) != (b)) (a) = (b)
+#define KMP_CHECK_UPDATE_SYNC(a, b) if ((a) != (b)) TCW_SYNC_PTR((a), (b))
+
 #define get__blocktime( xteam, xtid )     ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
 #define get__bt_set( xteam, xtid )        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
 #define get__bt_intervals( xteam, xtid )  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
@@ -2196,7 +2199,7 @@ struct kmp_taskdata {                                 /* aligned during dynamic
     kmp_uint32              td_taskwait_counter;
     kmp_int32               td_taskwait_thread;       /* gtid + 1 of thread encountered taskwait */
     KMP_ALIGN_CACHE kmp_internal_control_t  td_icvs;  /* Internal control variables for the task */
-    volatile kmp_uint32     td_allocated_child_tasks;  /* Child tasks (+ current task) not yet deallocated */
+    KMP_ALIGN_CACHE volatile kmp_uint32 td_allocated_child_tasks;  /* Child tasks (+ current task) not yet deallocated */
     volatile kmp_uint32     td_incomplete_child_tasks; /* Child tasks not yet complete */
 #if OMP_40_ENABLED
     kmp_taskgroup_t *       td_taskgroup;         // Each task keeps pointer to its current taskgroup
@@ -2515,12 +2518,14 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
     void                    *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
 
     KMP_ALIGN_CACHE kmp_info_t **t_threads;
-    int                      t_max_argc;
+    kmp_taskdata_t *t_implicit_task_taskdata;  // Taskdata for the thread's implicit task
+    int                      t_level;          // nested parallel level
+
+    KMP_ALIGN_CACHE int      t_max_argc;
     int                      t_max_nproc;    // maximum threads this team can handle (dynamicly expandable)
     int                      t_serialized;   // levels deep of serialized teams
     dispatch_shared_info_t  *t_disp_buffer;  // buffers for dispatch system
     int                      t_id;           // team's id, assigned by debugger.
-    int                      t_level;        // nested parallel level
     int                      t_active_level; // nested active parallel level
     kmp_r_sched_t            t_sched;        // run-time schedule for the team
 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
@@ -2536,8 +2541,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
     // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
     char dummy_padding[1024];
 #endif
-    KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata;  // Taskdata for the thread's implicit task
-    kmp_internal_control_t  *t_control_stack_top;  // internal control stack for additional nested teams.
+    KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;  // internal control stack for additional nested teams.
                                                    // for SERIALIZED teams nested 2 or more levels deep
 #if OMP_40_ENABLED
     kmp_int32                t_cancel_request; // typed flag to store request state of cancellation
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 6b0115f4ee6..56fa1b01542 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -2003,32 +2003,38 @@ __kmp_fork_call(
     KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
 
     /* setup the new team */
-    team->t.t_master_tid = master_tid;
-    team->t.t_master_this_cons = master_this_cons;
-    team->t.t_ident      = loc;
-    team->t.t_parent     = parent_team;
-    TCW_SYNC_PTR(team->t.t_pkfn, microtask);
+    KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
+    KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
+    KMP_CHECK_UPDATE(team->t.t_ident, loc);
+    KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
+    KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
 #if OMPT_SUPPORT
-    TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task);
+    KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
 #endif
-    team->t.t_invoke     = invoker;  /* TODO move this to root, maybe */
+    KMP_CHECK_UPDATE(team->t.t_invoke, invoker);  /* TODO move this to root, maybe */
     // TODO: parent_team->t.t_level == INT_MAX ???
 #if OMP_40_ENABLED
     if ( !master_th->th.th_teams_microtask || level > teams_level ) {
 #endif /* OMP_40_ENABLED */
-        team->t.t_level        = parent_team->t.t_level + 1;
-        team->t.t_active_level = parent_team->t.t_active_level + 1;
+        int new_level = parent_team->t.t_level + 1;
+        KMP_CHECK_UPDATE(team->t.t_level, new_level);
+        new_level = parent_team->t.t_active_level + 1;
+        KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
 #if OMP_40_ENABLED
     } else {
         // AC: Do not increase parallel level at start of the teams construct
-        team->t.t_level        = parent_team->t.t_level;
-        team->t.t_active_level = parent_team->t.t_active_level;
+        int new_level = parent_team->t.t_level;
+        KMP_CHECK_UPDATE(team->t.t_level, new_level);
+        new_level = parent_team->t.t_active_level;
+        KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
     }
 #endif /* OMP_40_ENABLED */
-    team->t.t_sched      = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule
+    kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
+    if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk)
+        team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
 
 #if OMP_40_ENABLED
-    team->t.t_cancel_request = cancel_noreq;
+    KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
 #endif
 
     // Update the floating point rounding in the team if required.
@@ -2095,23 +2101,27 @@ __kmp_fork_call(
 #if OMP_40_ENABLED
     if ( ap ) {
 #endif /* OMP_40_ENABLED */
-        for ( i=argc-1; i >= 0; --i )
+        for ( i=argc-1; i >= 0; --i ) {
 // TODO: revert workaround for Intel(R) 64 tracker #96
 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
-            *argv++ = va_arg( *ap, void * );
+            void *new_argv = va_arg(*ap, void *);
 #else
-            *argv++ = va_arg( ap, void * );
+            void *new_argv = va_arg(ap, void *);
 #endif
+            KMP_CHECK_UPDATE(*argv, new_argv);
+            argv++;
+        }
 #if OMP_40_ENABLED
     } else {
-        for ( i=0; i < argc; ++i )
+        for ( i=0; i < argc; ++i ) {
             // Get args from parent team for teams construct
-            argv[i] = team->t.t_parent->t.t_argv[i];
+            KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
+        }
     }
 #endif /* OMP_40_ENABLED */
 
     /* now actually fork the threads */
-    team->t.t_master_active = master_active;
+    KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
     if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
         root->r.r_active = TRUE;
 
@@ -4320,9 +4330,9 @@ __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ide
                     team->t.t_threads[0], team ) );
     KMP_DEBUG_ASSERT( team && new_icvs);
     KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
-    team->t.t_ident = loc;
+    KMP_CHECK_UPDATE(team->t.t_ident, loc);
 
-    team->t.t_id = KMP_GEN_TEAM_ID();
+    KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
 
     // Copy ICVs to the master thread's implicit taskdata
     __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
@@ -4774,11 +4784,13 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
             if ( team->t.t_size_changed == -1 ) {
                 team->t.t_size_changed = 1;
             } else {
-                team->t.t_size_changed = 0;
+                KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
             }
 
             // TODO???: team->t.t_max_active_levels = new_max_active_levels;
-            team->t.t_sched =  new_icvs->sched;
+            kmp_r_sched_t new_sched = new_icvs->sched;
+            if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk)
+                team->t.t_sched = new_sched; // set master's schedule as new run-time schedule
 
             __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
 
@@ -4795,7 +4807,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
                   team->t.t_last_place ) );
             }
             else {
-                team->t.t_proc_bind = new_proc_bind;
+                KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
                 __kmp_partition_places( team );
             }
 # else
@@ -5016,7 +5028,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
 
         /* reallocate space for arguments if necessary */
         __kmp_alloc_argv_entries( argc, team, TRUE );
-        team->t.t_argc     = argc;
+        KMP_CHECK_UPDATE(team->t.t_argc, argc);
         //
         // The hot team re-uses the previous task team,
         // if untouched during the previous release->gather phase.
@@ -5059,7 +5071,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
 
             /* reallocate space for arguments if necessary */
             __kmp_alloc_argv_entries( argc, team, TRUE );
-            team->t.t_argc     = argc;
+            KMP_CHECK_UPDATE(team->t.t_argc, argc);
 
             KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
                             team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));