diff options
author | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2015-12-11 21:57:06 +0000 |
---|---|---|
committer | Jonathan Peyton <jonathan.l.peyton@intel.com> | 2015-12-11 21:57:06 +0000 |
commit | dae13d81b48f5802137b2857701d0d3adce97b06 (patch) | |
tree | fef5b7a24a415dc378a6be15666e5320ce70452c /openmp/runtime/src/kmp_lock.cpp | |
parent | 4062601cb995e16dfdeae4e2755321dc0d60d2e1 (diff) | |
download | bcm5719-llvm-dae13d81b48f5802137b2857701d0d3adce97b06.tar.gz bcm5719-llvm-dae13d81b48f5802137b2857701d0d3adce97b06.zip |
Hinted lock (OpenMP 4.5 feature) Updates/Fixes Part 2
* Added a new user TSX lock implementation, RTM, This implementation is a
light-weight version of the adaptive lock implementation, omitting the
back-off logic for deciding when to specualte (or not). The fall-back lock is
still the queuing lock.
* Changed indirect lock table management. The data for indirect lock management
was encapsulated in the "kmp_indirect_lock_table_t" type. Also, the lock table
dimension was changed to 2D (was linear), and each entry is a
kmp_indirect_lock_t object now (was a pointer to an object).
* Some clean up in the critical section code
* Removed the limits of the tuning parameters read from KMP_ADAPTIVE_LOCK_PROPS
* KMP_USE_DYNAMIC_LOCK=1 also turns on these two switches:
KMP_USE_TSX, KMP_USE_ADAPTIVE_LOCKS
Differential Revision: http://reviews.llvm.org/D15204
llvm-svn: 255375
Diffstat (limited to 'openmp/runtime/src/kmp_lock.cpp')
-rw-r--r-- | openmp/runtime/src/kmp_lock.cpp | 297 |
1 files changed, 199 insertions, 98 deletions
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 43d751ea3bc..65f4867c6e6 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3014,11 +3014,13 @@ __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) # ifndef __OMP_H typedef enum kmp_lock_hint_t { kmp_lock_hint_none = 0, - kmp_lock_hint_contended, kmp_lock_hint_uncontended, + kmp_lock_hint_contended, kmp_lock_hint_nonspeculative, kmp_lock_hint_speculative, - kmp_lock_hint_adaptive, + kmp_lock_hint_hle, + kmp_lock_hint_rtm, + kmp_lock_hint_adaptive } kmp_lock_hint_t; # endif @@ -3029,7 +3031,7 @@ static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); } -#if KMP_HAS_HLE +#if KMP_USE_TSX // HLE lock functions - imported from the testbed runtime. #define HLE_ACQUIRE ".byte 0xf2;" @@ -3101,9 +3103,93 @@ __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) return __kmp_test_hle_lock(lck, gtid); // TODO: add checks } -#endif // KMP_HAS_HLE +static void +__kmp_init_rtm_lock(kmp_queuing_lock_t *lck) +{ + __kmp_init_queuing_lock(lck); +} + +static void +__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) +{ + __kmp_destroy_queuing_lock(lck); +} + +static void +__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + unsigned retries=3, status; + do { + status = _xbegin(); + if (status == _XBEGIN_STARTED) { + if (__kmp_is_unlocked_queuing_lock(lck)) + return; + _xabort(0xff); + } + if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { + // Wait until lock becomes free + while (! __kmp_is_unlocked_queuing_lock(lck)) + __kmp_yield(TRUE); + } + else if (!(status & _XABORT_RETRY)) + break; + } while (retries--); + + // Fall-back non-speculative lock (xchg) + __kmp_acquire_queuing_lock(lck, gtid); +} + +static void +__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + __kmp_acquire_rtm_lock(lck, gtid); +} + +static int +__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + if (__kmp_is_unlocked_queuing_lock(lck)) { + // Releasing from speculation + _xend(); + } + else { + // Releasing from a real lock + __kmp_release_queuing_lock(lck, gtid); + } + return KMP_LOCK_RELEASED; +} + +static int +__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_release_rtm_lock(lck, gtid); +} + +static int +__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + unsigned retries=3, status; + do { + status = _xbegin(); + if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { + return 1; + } + if (!(status & _XABORT_RETRY)) + break; + } while (retries--); -// Entry functions for indirect locks (first element of direct_*_ops[]). + return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; +} + +static int +__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_test_rtm_lock(lck, gtid); +} + +#endif // KMP_USE_TSX + +// Entry functions for indirect locks (first element of direct lock jump tables). static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); @@ -3191,24 +3277,10 @@ int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; // Lock index table. -kmp_indirect_lock_t **__kmp_indirect_lock_table; -kmp_lock_index_t __kmp_indirect_lock_table_size; -kmp_lock_index_t __kmp_indirect_lock_table_next; +kmp_indirect_lock_table_t __kmp_i_lock_table; // Size of indirect locks. -static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { - sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), -#if KMP_USE_ADAPTIVE_LOCKS - sizeof(kmp_adaptive_lock_t), -#endif - sizeof(kmp_drdpa_lock_t), - sizeof(kmp_tas_lock_t), -#if KMP_HAS_FUTEX - sizeof(kmp_futex_lock_t), -#endif - sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), - sizeof(kmp_drdpa_lock_t) -}; +static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; // Jump tables for lock accessor/modifier. void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; @@ -3219,28 +3291,10 @@ kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = // Use different lock pools for different lock types. static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; -// Inserts the given lock ptr to the lock table. -kmp_lock_index_t -__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck) -{ - kmp_lock_index_t next = __kmp_indirect_lock_table_next; - // Check capacity and double the size if required - if (next >= __kmp_indirect_lock_table_size) { - kmp_lock_index_t i; - kmp_lock_index_t size = __kmp_indirect_lock_table_size; - kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table; - __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *)); - __kmp_free(old_table); - __kmp_indirect_lock_table_size = 2*next; - } - // Insert lck to the table and return the index. - __kmp_indirect_lock_table[next] = lck; - __kmp_indirect_lock_table_next++; - return next; -} - -// User lock allocator for dynamically dispatched locks. +// User lock allocator for dynamically dispatched indirect locks. +// Every entry of the indirect lock table holds the address and type of the allocated indrect lock +// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock +// object is returned to the reusable pool of locks, unique to each lock type. kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) { @@ -3250,15 +3304,33 @@ __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_lock __kmp_acquire_lock(&__kmp_global_lock, gtid); if (__kmp_indirect_lock_pool[tag] != NULL) { + // Reuse the allocated and destroyed lock object lck = __kmp_indirect_lock_pool[tag]; if (OMP_LOCK_T_SIZE < sizeof(void *)) idx = lck->lock->pool.index; __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; + KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); } else { - lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t)); + idx = __kmp_i_lock_table.next; + // Check capacity and double the size if it is full + if (idx == __kmp_i_lock_table.size) { + // Double up the space for block pointers + int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; + kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); + KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); + __kmp_free(old_table); + // Allocate new objects in the new blocks + for (int i = row; i < 2*row; ++i) + *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) + __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); + __kmp_i_lock_table.size = 2*idx; + } + __kmp_i_lock_table.next++; + lck = KMP_GET_I_LOCK(idx); + // Allocate a new base lock object lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); - if (OMP_LOCK_T_SIZE < sizeof(void *)) - idx = __kmp_insert_indirect_lock(lck); + KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); } __kmp_release_lock(&__kmp_global_lock, gtid); @@ -3286,10 +3358,10 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) } if (OMP_LOCK_T_SIZE < sizeof(void *)) { kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); - if (idx < 0 || idx >= __kmp_indirect_lock_table_size) { + if (idx < 0 || idx >= __kmp_i_lock_table.size) { KMP_FATAL(LockIsUninitialized, func); } - lck = __kmp_indirect_lock_table[idx]; + lck = KMP_GET_I_LOCK(idx); } else { lck = *((kmp_indirect_lock_t **)user_lock); } @@ -3299,7 +3371,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) return lck; } else { if (OMP_LOCK_T_SIZE < sizeof(void *)) { - return __kmp_indirect_lock_table[KMP_EXTRACT_I_INDEX(user_lock)]; + return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); } else { return *((kmp_indirect_lock_t **)user_lock); } @@ -3315,10 +3387,15 @@ __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) seq = lockseq_queuing; } #endif +#if KMP_USE_TSX + if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { + seq = lockseq_queuing; + } +#endif kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); KMP_I_LOCK_FUNC(l, init)(l->lock); - KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type)); + KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); } static void @@ -3395,7 +3472,7 @@ __kmp_init_lock_hinted(void **lock, int hint) seq = lockseq_tas; break; case kmp_lock_hint_speculative: -#if KMP_HAS_HLE +#if KMP_USE_TSX seq = lockseq_hle; #else seq = lockseq_tas; @@ -3408,6 +3485,14 @@ __kmp_init_lock_hinted(void **lock, int hint) seq = lockseq_queuing; #endif break; +#if KMP_USE_TSX + case kmp_lock_hint_hle: + seq = lockseq_hle; + break; + case kmp_lock_hint_rtm: + seq = lockseq_rtm; + break; +#endif // Defaults to queuing locks. case kmp_lock_hint_contended: case kmp_lock_hint_nonspeculative: @@ -3474,7 +3559,6 @@ __kmp_init_nest_lock_hinted(void **lock, int hint) case kmp_lock_hint_nonspeculative: default: seq = lockseq_nested_queuing; - break; } KMP_INIT_I_LOCK(lock, seq); #if USE_ITT_BUILD @@ -3483,27 +3567,6 @@ __kmp_init_nest_lock_hinted(void **lock, int hint) #endif } -#if KMP_USE_ADAPTIVE_LOCKS -# define init_lock_func(table, expand) { \ - table[locktag_ticket] = expand(ticket); \ - table[locktag_queuing] = expand(queuing); \ - table[locktag_adaptive] = expand(queuing); \ - table[locktag_drdpa] = expand(drdpa); \ - table[locktag_nested_ticket] = expand(ticket); \ - table[locktag_nested_queuing] = expand(queuing); \ - table[locktag_nested_drdpa] = expand(drdpa); \ -} -#else -# define init_lock_func(table, expand) { \ - table[locktag_ticket] = expand(ticket); \ - table[locktag_queuing] = expand(queuing); \ - table[locktag_drdpa] = expand(drdpa); \ - table[locktag_nested_ticket] = expand(ticket); \ - table[locktag_nested_queuing] = expand(queuing); \ - table[locktag_nested_drdpa] = expand(drdpa); \ -} -#endif // KMP_USE_ADAPTIVE_LOCKS - // Initializes data for dynamic user locks. void __kmp_init_dynamic_user_locks() @@ -3527,24 +3590,62 @@ __kmp_init_dynamic_user_locks() } // Initialize lock index table - __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024); - __kmp_indirect_lock_table_size = 1024; - __kmp_indirect_lock_table_next = 0; + __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); + *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) + __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); + __kmp_i_lock_table.next = 0; + + // Indirect lock size + __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); + __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); +#if KMP_USE_ADAPTIVE_LOCKS + __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); +#endif + __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); +#if KMP_USE_TSX + __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); +#endif + __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); +#if KMP_USE_FUTEX + __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); +#endif + __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); + __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); + __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); // Initialize lock accessor/modifier - // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe. -#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location - init_lock_func(__kmp_indirect_set_location, expand_func); -#undef expand_func -#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags - init_lock_func(__kmp_indirect_set_flags, expand_func); -#undef expand_func -#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location - init_lock_func(__kmp_indirect_get_location, expand_func); -#undef expand_func -#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags - init_lock_func(__kmp_indirect_get_flags, expand_func); -#undef expand_func +#define fill_jumps(table, expand, sep) { \ + table[locktag##sep##ticket] = expand(ticket); \ + table[locktag##sep##queuing] = expand(queuing); \ + table[locktag##sep##drdpa] = expand(drdpa); \ +} + +#if KMP_USE_ADAPTIVE_LOCKS +# define fill_table(table, expand) { \ + fill_jumps(table, expand, _); \ + table[locktag_adaptive] = expand(queuing); \ + fill_jumps(table, expand, _nested_); \ +} +#else +# define fill_table(table, expand) { \ + fill_jumps(table, expand, _); \ + fill_jumps(table, expand, _nested_); \ +} +#endif // KMP_USE_ADAPTIVE_LOCKS + +#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location + fill_table(__kmp_indirect_set_location, expand); +#undef expand +#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags + fill_table(__kmp_indirect_set_flags, expand); +#undef expand +#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location + fill_table(__kmp_indirect_get_location, expand); +#undef expand +#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags + fill_table(__kmp_indirect_get_flags, expand); +#undef expand __kmp_init_user_locks = TRUE; } @@ -3562,25 +3663,25 @@ __kmp_cleanup_indirect_user_locks() while (l != NULL) { kmp_indirect_lock_t *ll = l; l = (kmp_indirect_lock_t *)l->lock->pool.next; - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - __kmp_indirect_lock_table[ll->lock->pool.index] = NULL; - } + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); __kmp_free(ll->lock); - __kmp_free(ll); + ll->lock = NULL; } } // Clean up the remaining undestroyed locks. - for (i = 0; i < __kmp_indirect_lock_table_next; i++) { - kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i]; - if (l != NULL) { + for (i = 0; i < __kmp_i_lock_table.next; i++) { + kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); + if (l->lock != NULL) { // Locks not destroyed explicitly need to be destroyed here. KMP_I_LOCK_FUNC(l, destroy)(l->lock); + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); __kmp_free(l->lock); - __kmp_free(l); } } // Free the table - __kmp_free(__kmp_indirect_lock_table); + for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) + __kmp_free(__kmp_i_lock_table.table[i]); + __kmp_free(__kmp_i_lock_table.table); __kmp_init_user_locks = FALSE; } |