summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_lock.cpp
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2015-12-11 21:57:06 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2015-12-11 21:57:06 +0000
commitdae13d81b48f5802137b2857701d0d3adce97b06 (patch)
treefef5b7a24a415dc378a6be15666e5320ce70452c /openmp/runtime/src/kmp_lock.cpp
parent4062601cb995e16dfdeae4e2755321dc0d60d2e1 (diff)
downloadbcm5719-llvm-dae13d81b48f5802137b2857701d0d3adce97b06.tar.gz
bcm5719-llvm-dae13d81b48f5802137b2857701d0d3adce97b06.zip
Hinted lock (OpenMP 4.5 feature) Updates/Fixes Part 2
* Added a new user TSX lock implementation, RTM, This implementation is a light-weight version of the adaptive lock implementation, omitting the back-off logic for deciding when to specualte (or not). The fall-back lock is still the queuing lock. * Changed indirect lock table management. The data for indirect lock management was encapsulated in the "kmp_indirect_lock_table_t" type. Also, the lock table dimension was changed to 2D (was linear), and each entry is a kmp_indirect_lock_t object now (was a pointer to an object). * Some clean up in the critical section code * Removed the limits of the tuning parameters read from KMP_ADAPTIVE_LOCK_PROPS * KMP_USE_DYNAMIC_LOCK=1 also turns on these two switches: KMP_USE_TSX, KMP_USE_ADAPTIVE_LOCKS Differential Revision: http://reviews.llvm.org/D15204 llvm-svn: 255375
Diffstat (limited to 'openmp/runtime/src/kmp_lock.cpp')
-rw-r--r--openmp/runtime/src/kmp_lock.cpp297
1 files changed, 199 insertions, 98 deletions
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 43d751ea3bc..65f4867c6e6 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -3014,11 +3014,13 @@ __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
# ifndef __OMP_H
typedef enum kmp_lock_hint_t {
kmp_lock_hint_none = 0,
- kmp_lock_hint_contended,
kmp_lock_hint_uncontended,
+ kmp_lock_hint_contended,
kmp_lock_hint_nonspeculative,
kmp_lock_hint_speculative,
- kmp_lock_hint_adaptive,
+ kmp_lock_hint_hle,
+ kmp_lock_hint_rtm,
+ kmp_lock_hint_adaptive
} kmp_lock_hint_t;
# endif
@@ -3029,7 +3031,7 @@ static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
}
-#if KMP_HAS_HLE
+#if KMP_USE_TSX
// HLE lock functions - imported from the testbed runtime.
#define HLE_ACQUIRE ".byte 0xf2;"
@@ -3101,9 +3103,93 @@ __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
}
-#endif // KMP_HAS_HLE
+static void
+__kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
+{
+ __kmp_init_queuing_lock(lck);
+}
+
+static void
+__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
+{
+ __kmp_destroy_queuing_lock(lck);
+}
+
+static void
+__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+ unsigned retries=3, status;
+ do {
+ status = _xbegin();
+ if (status == _XBEGIN_STARTED) {
+ if (__kmp_is_unlocked_queuing_lock(lck))
+ return;
+ _xabort(0xff);
+ }
+ if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
+ // Wait until lock becomes free
+ while (! __kmp_is_unlocked_queuing_lock(lck))
+ __kmp_yield(TRUE);
+ }
+ else if (!(status & _XABORT_RETRY))
+ break;
+ } while (retries--);
+
+ // Fall-back non-speculative lock (xchg)
+ __kmp_acquire_queuing_lock(lck, gtid);
+}
+
+static void
+__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+ __kmp_acquire_rtm_lock(lck, gtid);
+}
+
+static int
+__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+ if (__kmp_is_unlocked_queuing_lock(lck)) {
+ // Releasing from speculation
+ _xend();
+ }
+ else {
+ // Releasing from a real lock
+ __kmp_release_queuing_lock(lck, gtid);
+ }
+ return KMP_LOCK_RELEASED;
+}
+
+static int
+__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+ return __kmp_release_rtm_lock(lck, gtid);
+}
+
+static int
+__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+ unsigned retries=3, status;
+ do {
+ status = _xbegin();
+ if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
+ return 1;
+ }
+ if (!(status & _XABORT_RETRY))
+ break;
+ } while (retries--);
-// Entry functions for indirect locks (first element of direct_*_ops[]).
+ return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
+}
+
+static int
+__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
+{
+ return __kmp_test_rtm_lock(lck, gtid);
+}
+
+#endif // KMP_USE_TSX
+
+// Entry functions for indirect locks (first element of direct lock jump tables).
static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
@@ -3191,24 +3277,10 @@ int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
// Lock index table.
-kmp_indirect_lock_t **__kmp_indirect_lock_table;
-kmp_lock_index_t __kmp_indirect_lock_table_size;
-kmp_lock_index_t __kmp_indirect_lock_table_next;
+kmp_indirect_lock_table_t __kmp_i_lock_table;
// Size of indirect locks.
-static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {
- sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
-#if KMP_USE_ADAPTIVE_LOCKS
- sizeof(kmp_adaptive_lock_t),
-#endif
- sizeof(kmp_drdpa_lock_t),
- sizeof(kmp_tas_lock_t),
-#if KMP_HAS_FUTEX
- sizeof(kmp_futex_lock_t),
-#endif
- sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
- sizeof(kmp_drdpa_lock_t)
-};
+static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
// Jump tables for lock accessor/modifier.
void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
@@ -3219,28 +3291,10 @@ kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) =
// Use different lock pools for different lock types.
static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
-// Inserts the given lock ptr to the lock table.
-kmp_lock_index_t
-__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
-{
- kmp_lock_index_t next = __kmp_indirect_lock_table_next;
- // Check capacity and double the size if required
- if (next >= __kmp_indirect_lock_table_size) {
- kmp_lock_index_t i;
- kmp_lock_index_t size = __kmp_indirect_lock_table_size;
- kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
- __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
- KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
- __kmp_free(old_table);
- __kmp_indirect_lock_table_size = 2*next;
- }
- // Insert lck to the table and return the index.
- __kmp_indirect_lock_table[next] = lck;
- __kmp_indirect_lock_table_next++;
- return next;
-}
-
-// User lock allocator for dynamically dispatched locks.
+// User lock allocator for dynamically dispatched indirect locks.
+// Every entry of the indirect lock table holds the address and type of the allocated indrect lock
+// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
+// object is returned to the reusable pool of locks, unique to each lock type.
kmp_indirect_lock_t *
__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
{
@@ -3250,15 +3304,33 @@ __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_lock
__kmp_acquire_lock(&__kmp_global_lock, gtid);
if (__kmp_indirect_lock_pool[tag] != NULL) {
+ // Reuse the allocated and destroyed lock object
lck = __kmp_indirect_lock_pool[tag];
if (OMP_LOCK_T_SIZE < sizeof(void *))
idx = lck->lock->pool.index;
__kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
+ KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
} else {
- lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
+ idx = __kmp_i_lock_table.next;
+ // Check capacity and double the size if it is full
+ if (idx == __kmp_i_lock_table.size) {
+ // Double up the space for block pointers
+ int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
+ kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
+ __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
+ KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
+ __kmp_free(old_table);
+ // Allocate new objects in the new blocks
+ for (int i = row; i < 2*row; ++i)
+ *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
+ __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
+ __kmp_i_lock_table.size = 2*idx;
+ }
+ __kmp_i_lock_table.next++;
+ lck = KMP_GET_I_LOCK(idx);
+ // Allocate a new base lock object
lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
- if (OMP_LOCK_T_SIZE < sizeof(void *))
- idx = __kmp_insert_indirect_lock(lck);
+ KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
}
__kmp_release_lock(&__kmp_global_lock, gtid);
@@ -3286,10 +3358,10 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func)
}
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
- if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
+ if (idx < 0 || idx >= __kmp_i_lock_table.size) {
KMP_FATAL(LockIsUninitialized, func);
}
- lck = __kmp_indirect_lock_table[idx];
+ lck = KMP_GET_I_LOCK(idx);
} else {
lck = *((kmp_indirect_lock_t **)user_lock);
}
@@ -3299,7 +3371,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func)
return lck;
} else {
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
- return __kmp_indirect_lock_table[KMP_EXTRACT_I_INDEX(user_lock)];
+ return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
} else {
return *((kmp_indirect_lock_t **)user_lock);
}
@@ -3315,10 +3387,15 @@ __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
seq = lockseq_queuing;
}
#endif
+#if KMP_USE_TSX
+ if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
+ seq = lockseq_queuing;
+ }
+#endif
kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
KMP_I_LOCK_FUNC(l, init)(l->lock);
- KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
+ KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
}
static void
@@ -3395,7 +3472,7 @@ __kmp_init_lock_hinted(void **lock, int hint)
seq = lockseq_tas;
break;
case kmp_lock_hint_speculative:
-#if KMP_HAS_HLE
+#if KMP_USE_TSX
seq = lockseq_hle;
#else
seq = lockseq_tas;
@@ -3408,6 +3485,14 @@ __kmp_init_lock_hinted(void **lock, int hint)
seq = lockseq_queuing;
#endif
break;
+#if KMP_USE_TSX
+ case kmp_lock_hint_hle:
+ seq = lockseq_hle;
+ break;
+ case kmp_lock_hint_rtm:
+ seq = lockseq_rtm;
+ break;
+#endif
// Defaults to queuing locks.
case kmp_lock_hint_contended:
case kmp_lock_hint_nonspeculative:
@@ -3474,7 +3559,6 @@ __kmp_init_nest_lock_hinted(void **lock, int hint)
case kmp_lock_hint_nonspeculative:
default:
seq = lockseq_nested_queuing;
- break;
}
KMP_INIT_I_LOCK(lock, seq);
#if USE_ITT_BUILD
@@ -3483,27 +3567,6 @@ __kmp_init_nest_lock_hinted(void **lock, int hint)
#endif
}
-#if KMP_USE_ADAPTIVE_LOCKS
-# define init_lock_func(table, expand) { \
- table[locktag_ticket] = expand(ticket); \
- table[locktag_queuing] = expand(queuing); \
- table[locktag_adaptive] = expand(queuing); \
- table[locktag_drdpa] = expand(drdpa); \
- table[locktag_nested_ticket] = expand(ticket); \
- table[locktag_nested_queuing] = expand(queuing); \
- table[locktag_nested_drdpa] = expand(drdpa); \
-}
-#else
-# define init_lock_func(table, expand) { \
- table[locktag_ticket] = expand(ticket); \
- table[locktag_queuing] = expand(queuing); \
- table[locktag_drdpa] = expand(drdpa); \
- table[locktag_nested_ticket] = expand(ticket); \
- table[locktag_nested_queuing] = expand(queuing); \
- table[locktag_nested_drdpa] = expand(drdpa); \
-}
-#endif // KMP_USE_ADAPTIVE_LOCKS
-
// Initializes data for dynamic user locks.
void
__kmp_init_dynamic_user_locks()
@@ -3527,24 +3590,62 @@ __kmp_init_dynamic_user_locks()
}
// Initialize lock index table
- __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
- __kmp_indirect_lock_table_size = 1024;
- __kmp_indirect_lock_table_next = 0;
+ __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
+ __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
+ *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
+ __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
+ __kmp_i_lock_table.next = 0;
+
+ // Indirect lock size
+ __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
+ __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
+#if KMP_USE_ADAPTIVE_LOCKS
+ __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
+#endif
+ __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
+#if KMP_USE_TSX
+ __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
+#endif
+ __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
+#if KMP_USE_FUTEX
+ __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
+#endif
+ __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
+ __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
+ __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
// Initialize lock accessor/modifier
- // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
-#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
- init_lock_func(__kmp_indirect_set_location, expand_func);
-#undef expand_func
-#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
- init_lock_func(__kmp_indirect_set_flags, expand_func);
-#undef expand_func
-#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
- init_lock_func(__kmp_indirect_get_location, expand_func);
-#undef expand_func
-#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
- init_lock_func(__kmp_indirect_get_flags, expand_func);
-#undef expand_func
+#define fill_jumps(table, expand, sep) { \
+ table[locktag##sep##ticket] = expand(ticket); \
+ table[locktag##sep##queuing] = expand(queuing); \
+ table[locktag##sep##drdpa] = expand(drdpa); \
+}
+
+#if KMP_USE_ADAPTIVE_LOCKS
+# define fill_table(table, expand) { \
+ fill_jumps(table, expand, _); \
+ table[locktag_adaptive] = expand(queuing); \
+ fill_jumps(table, expand, _nested_); \
+}
+#else
+# define fill_table(table, expand) { \
+ fill_jumps(table, expand, _); \
+ fill_jumps(table, expand, _nested_); \
+}
+#endif // KMP_USE_ADAPTIVE_LOCKS
+
+#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
+ fill_table(__kmp_indirect_set_location, expand);
+#undef expand
+#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
+ fill_table(__kmp_indirect_set_flags, expand);
+#undef expand
+#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
+ fill_table(__kmp_indirect_get_location, expand);
+#undef expand
+#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
+ fill_table(__kmp_indirect_get_flags, expand);
+#undef expand
__kmp_init_user_locks = TRUE;
}
@@ -3562,25 +3663,25 @@ __kmp_cleanup_indirect_user_locks()
while (l != NULL) {
kmp_indirect_lock_t *ll = l;
l = (kmp_indirect_lock_t *)l->lock->pool.next;
- if (OMP_LOCK_T_SIZE < sizeof(void *)) {
- __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
- }
+ KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
__kmp_free(ll->lock);
- __kmp_free(ll);
+ ll->lock = NULL;
}
}
// Clean up the remaining undestroyed locks.
- for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
- kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
- if (l != NULL) {
+ for (i = 0; i < __kmp_i_lock_table.next; i++) {
+ kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
+ if (l->lock != NULL) {
// Locks not destroyed explicitly need to be destroyed here.
KMP_I_LOCK_FUNC(l, destroy)(l->lock);
+ KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
__kmp_free(l->lock);
- __kmp_free(l);
}
}
// Free the table
- __kmp_free(__kmp_indirect_lock_table);
+ for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
+ __kmp_free(__kmp_i_lock_table.table[i]);
+ __kmp_free(__kmp_i_lock_table.table);
__kmp_init_user_locks = FALSE;
}
OpenPOWER on IntegriCloud