summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu4
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/critical.cu10
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/interface.h158
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu107
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu32
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu9
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h55
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu30
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/task.cu28
9 files changed, 262 insertions, 171 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu
index 77033db2d46..ccfb7ad86a4 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu
@@ -13,14 +13,14 @@
#include "omptarget-nvptx.h"
-EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal) {
PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", cancelVal);
// disabled
return FALSE;
}
-EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal) {
PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", cancelVal);
// disabled
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/critical.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/critical.cu
index 2b92b9a1ca8..9bf2a30ddcd 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/critical.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/critical.cu
@@ -15,14 +15,16 @@
#include "omptarget-nvptx.h"
-EXTERN void __kmpc_critical(kmp_Indent *loc, int32_t global_tid,
- kmp_CriticalName *lck) {
+EXTERN
+void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
+ kmp_CriticalName *lck) {
PRINT0(LD_IO, "call to kmpc_critical()\n");
omp_set_lock((omp_lock_t *)lck);
}
-EXTERN void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid,
- kmp_CriticalName *lck) {
+EXTERN
+void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
+ kmp_CriticalName *lck) {
PRINT0(LD_IO, "call to kmpc_end_critical()\n");
omp_unset_lock((omp_lock_t *)lck);
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
index 7a37c04c58e..4414ea43b99 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -160,8 +160,36 @@ typedef enum kmp_sched_t {
} kmp_sched_t;
+/*!
+ * Enum for accesseing the reserved_2 field of the ident_t struct below.
+ */
+enum {
+ /*! Bit set to 1 when in SPMD mode. */
+ KMP_IDENT_SPMD_MODE = 0x01,
+ /*! Bit set to 1 when a simplified runtime is used. */
+ KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+};
+
+/*!
+ * The ident structure that describes a source location.
+ * The struct is identical to the one in the kmp.h file.
+ * We maintain the same data structure for compatibility.
+ */
+typedef int kmp_int32;
+typedef struct ident {
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
+ identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields
+ which describe the source file, the function and a pair
+ of line numbers that delimit the construct. */
+} ident_t;
+
// parallel defs
-typedef void kmp_Indent;
+typedef ident_t kmp_Ident;
typedef void (*kmp_ParFctPtr)(int32_t *global_tid, int32_t *bound_tid, ...);
typedef void (*kmp_ReductFctPtr)(void *lhsData, void *rhsData);
typedef void (*kmp_InterWarpCopyFctPtr)(void *src, int32_t warp_num);
@@ -223,28 +251,28 @@ typedef int32_t kmp_CriticalName[8];
////////////////////////////////////////////////////////////////////////////////
// query
-EXTERN int32_t __kmpc_global_num_threads(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_bound_thread_num(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_bound_num_threads(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_in_parallel(kmp_Indent *loc); // missing
+EXTERN int32_t __kmpc_global_num_threads(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_bound_thread_num(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_bound_num_threads(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_in_parallel(kmp_Ident *loc); // missing
// parallel
-EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc);
-EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc);
+EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t global_tid,
int32_t num_threads);
// simd
-EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t global_tid,
int32_t simd_limit);
// aee ... not supported
-// EXTERN void __kmpc_fork_call(kmp_Indent *loc, int32_t argc, kmp_ParFctPtr
+// EXTERN void __kmpc_fork_call(kmp_Ident *loc, int32_t argc, kmp_ParFctPtr
// microtask, ...);
-EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
+EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
uint32_t global_tid);
-EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid);
+EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid);
// proc bind
-EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t global_tid,
int proc_bind);
EXTERN int omp_get_num_places(void);
EXTERN int omp_get_place_num_procs(int place_num);
@@ -254,52 +282,52 @@ EXTERN int omp_get_partition_num_places(void);
EXTERN void omp_get_partition_place_nums(int *place_nums);
// for static (no chunk or chunk)
-EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
-EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
-EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
-EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter1,
uint64_t *plower, uint64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
EXTERN
-void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
EXTERN
-void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
EXTERN
-void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
EXTERN
-void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter1,
uint64_t *plower, uint64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
EXTERN
-void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc,
+void __kmpc_for_static_init_4_simple_generic(kmp_Ident *loc,
int32_t global_tid, int32_t sched,
int32_t *plastiter,
int32_t *plower, int32_t *pupper,
@@ -307,11 +335,11 @@ void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc,
int32_t chunk);
EXTERN
void __kmpc_for_static_init_4u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
int32_t chunk);
EXTERN
-void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc,
+void __kmpc_for_static_init_8_simple_generic(kmp_Ident *loc,
int32_t global_tid, int32_t sched,
int32_t *plastiter,
int64_t *plower, int64_t *pupper,
@@ -319,48 +347,48 @@ void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc,
int64_t chunk);
EXTERN
void __kmpc_for_static_init_8u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
+ kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
int64_t chunk);
-EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid);
// for dynamic
-EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t lower, int32_t upper,
int32_t incr, int32_t chunk);
-EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, uint32_t lower,
uint32_t upper, int32_t incr,
int32_t chunk);
-EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int64_t lower, int64_t upper,
int64_t incr, int64_t chunk);
-EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, uint64_t lower,
uint64_t upper, int64_t incr,
int64_t chunk);
-EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, int32_t *plower,
int32_t *pupper, int32_t *pstride);
-EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, uint32_t *plower,
uint32_t *pupper, int32_t *pstride);
-EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, int64_t *plower,
int64_t *pupper, int64_t *pstride);
-EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, uint64_t *plower,
uint64_t *pupper, int64_t *pstride);
-EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t global_tid);
// Support for reducing conditional lastprivate variables
-EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc,
+EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc,
int32_t global_tid,
int32_t varNum, void *array);
@@ -395,63 +423,63 @@ EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
// sync barrier
-EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid);
-EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid);
-EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid);
-EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid);
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid);
// single
-EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid);
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid);
// sync
-EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_critical(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit);
-EXTERN void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit);
-EXTERN void __kmpc_flush(kmp_Indent *loc);
+EXTERN void __kmpc_flush(kmp_Ident *loc);
// vote
EXTERN int32_t __kmpc_warp_active_thread_mask();
// tasks
-EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Indent *loc,
+EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
uint32_t global_tid, int32_t flag,
size_t sizeOfTaskInclPrivate,
size_t sizeOfSharedTable,
kmp_TaskFctPtr sub);
-EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr,
int32_t depNum, void *depList,
int32_t noAliasDepNum,
void *noAliasDepList);
-EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList);
-EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid);
-EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid);
+EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
int end_part);
-EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr, int if_val,
uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
int32_t sched, uint64_t grainsize, void *task_dup);
// cancel
-EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal);
-EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal);
// non standard
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
index bd84f0fad13..3d409d82f85 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -238,10 +238,10 @@ public:
schedule <= kmp_sched_ordered_last;
}
- INLINE static void dispatch_init(kmp_Indent *loc, int32_t threadId,
+ INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId,
kmp_sched_t schedule, T lb, T ub, ST st,
ST chunk) {
- ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
"Expected non-SPMD mode + initialized runtime.");
int tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
@@ -249,8 +249,9 @@ public:
T tripCount = ub - lb + 1; // +1 because ub is inclusive
ASSERT0(
LT_FUSSY,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) <
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ GetOmpThreadId(tid, checkSPMDMode(loc), checkRuntimeUninitialized(loc)) <
+ GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)),
"current thread is not needed here; error");
/* Currently just ignore the monotonic and non-monotonic modifiers
@@ -321,7 +322,8 @@ public:
int lastiter = 0;
ForStaticChunk(
lastiter, lb, ub, stride, chunk,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+ GetOmpThreadId(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)), tnum);
// save computed params
omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -329,7 +331,8 @@ public:
PRINT(LD_LOOP,
"dispatch init (static chunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)),
omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
omptarget_nvptx_threadPrivateContext->Stride(tid));
@@ -350,7 +353,8 @@ public:
T oldUb = ub;
ForStaticChunk(
lastiter, lb, ub, stride, chunk,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+ GetOmpThreadId(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)), tnum);
ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
if (ub > oldUb)
ub = oldUb;
@@ -361,7 +365,8 @@ public:
PRINT(LD_LOOP,
"dispatch init (static chunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)),
omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
omptarget_nvptx_threadPrivateContext->Stride(tid));
@@ -376,7 +381,8 @@ public:
int lastiter = 0;
ForStaticNoChunk(
lastiter, lb, ub, stride, chunk,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+ GetOmpThreadId(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)), tnum);
// save computed params
omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -384,7 +390,8 @@ public:
PRINT(LD_LOOP,
"dispatch init (static nochunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)),
omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
omptarget_nvptx_threadPrivateContext->Stride(tid));
@@ -405,7 +412,8 @@ public:
PRINT(LD_LOOP,
"dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64
", chunk %" PRIu64 "\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc)),
omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId),
omptarget_nvptx_threadPrivateContext->Chunk(teamId));
@@ -538,7 +546,7 @@ public:
////////////////////////////////////////////////////////////////////////////////
// init
-EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t tid,
int32_t schedule, int32_t lb, int32_t ub,
int32_t st, int32_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_4\n");
@@ -546,7 +554,7 @@ EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid,
loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
}
-EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t tid,
int32_t schedule, uint32_t lb, uint32_t ub,
int32_t st, int32_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_4u\n");
@@ -554,7 +562,7 @@ EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid,
loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
}
-EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t tid,
int32_t schedule, int64_t lb, int64_t ub,
int64_t st, int64_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_8\n");
@@ -562,7 +570,7 @@ EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid,
loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
}
-EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t tid,
int32_t schedule, uint64_t lb, uint64_t ub,
int64_t st, int64_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_8u\n");
@@ -571,14 +579,14 @@ EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid,
}
// next
-EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t tid, int32_t *p_last,
+EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t tid, int32_t *p_last,
int32_t *p_lb, int32_t *p_ub, int32_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_4\n");
return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next(
p_last, p_lb, p_ub, p_st);
}
-EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t tid,
+EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t tid,
int32_t *p_last, uint32_t *p_lb,
uint32_t *p_ub, int32_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_4u\n");
@@ -586,14 +594,14 @@ EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t tid,
p_last, p_lb, p_ub, p_st);
}
-EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t tid, int32_t *p_last,
+EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t tid, int32_t *p_last,
int64_t *p_lb, int64_t *p_ub, int64_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_8\n");
return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next(
p_last, p_lb, p_ub, p_st);
}
-EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t tid,
+EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t tid,
int32_t *p_last, uint64_t *p_lb,
uint64_t *p_ub, int64_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_8u\n");
@@ -602,22 +610,22 @@ EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t tid,
}
// fini
-EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini();
}
-EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini();
}
-EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini();
}
-EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini();
}
@@ -626,52 +634,52 @@ EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) {
// KMP interface implementation (static loops)
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
-EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
-EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
-EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
uint64_t *plower, uint64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
EXTERN
-void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
@@ -684,7 +692,7 @@ void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
}
EXTERN
-void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype,
int32_t *plastiter, uint32_t *plower,
uint32_t *pupper, int32_t *pstride,
@@ -697,7 +705,7 @@ void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
}
EXTERN
-void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
@@ -710,7 +718,7 @@ void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
}
EXTERN
-void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype,
int32_t *plastiter, uint64_t *plower,
uint64_t *pupper, int64_t *pstride,
@@ -724,7 +732,7 @@ void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
EXTERN
void __kmpc_for_static_init_4_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_generic\n");
@@ -736,7 +744,7 @@ void __kmpc_for_static_init_4_simple_generic(
EXTERN
void __kmpc_for_static_init_4u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_generic\n");
@@ -748,7 +756,7 @@ void __kmpc_for_static_init_4u_simple_generic(
EXTERN
void __kmpc_for_static_init_8_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_generic\n");
@@ -760,7 +768,7 @@ void __kmpc_for_static_init_8_simple_generic(
EXTERN
void __kmpc_for_static_init_8u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_generic\n");
@@ -770,7 +778,7 @@ void __kmpc_for_static_init_8u_simple_generic(
/*IsRuntimeUninitialized=*/true);
}
-EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_for_static_fini\n");
}
@@ -792,17 +800,18 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) {
}
}; // namespace
-EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc, int32_t gtid,
+EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid,
int32_t varNum, void *array) {
PRINT0(LD_IO, "call to __kmpc_reduce_conditional_lastprivate(...)\n");
- ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
"Expected non-SPMD mode + initialized runtime.");
omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
- int tid = GetOmpThreadId(GetLogicalThreadIdInBlock(), isSPMDMode(),
- isRuntimeUninitialized());
+ int tid = GetOmpThreadId(GetLogicalThreadIdInBlock(), checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc));
uint32_t NumThreads = GetNumberOfOmpThreads(
- GetLogicalThreadIdInBlock(), isSPMDMode(), isRuntimeUninitialized());
+ GetLogicalThreadIdInBlock(), checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc));
uint64_t *Buffer = teamDescr.getLastprivateIterBuffer();
for (unsigned i = 0; i < varNum; i++) {
// Reset buffer.
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 13e64e44ac5..0825564ca09 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -332,11 +332,11 @@ EXTERN void __kmpc_kernel_end_parallel() {
// support for parallel that goes sequential
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_serialized_parallel\n");
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+ if (checkRuntimeUninitialized(loc)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
omptarget_nvptx_simpleThreadPrivateContext->IncParLevel();
return;
@@ -370,12 +370,12 @@ EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) {
newTaskDescr);
}
-EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
+EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_end_serialized_parallel\n");
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+ if (checkRuntimeUninitialized(loc)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
omptarget_nvptx_simpleThreadPrivateContext->DecParLevel();
return;
@@ -393,11 +393,11 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
currTaskDescr->RestoreLoopData();
}
-EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_parallel_level\n");
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+ if (checkRuntimeUninitialized(loc)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
}
@@ -417,7 +417,7 @@ EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) {
// cached by the compiler and used when calling the runtime. On nvptx
// it's cheap to recalculate this value so we never use the result
// of this call.
-EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc) {
+EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
return GetLogicalThreadIdInBlock();
}
@@ -425,19 +425,19 @@ EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc) {
// push params
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
int32_t num_threads) {
PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads);
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) =
num_threads;
}
-EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t tid,
int32_t simd_limit) {
PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", simd_limit);
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_threadPrivateContext->SimdLimitForNextSimd(tid) = simd_limit;
}
@@ -445,14 +445,14 @@ EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid,
// Do nothing. The host guarantees we started the requested number of
// teams and we only need inspection of gridDim.
-EXTERN void __kmpc_push_num_teams(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid,
int32_t num_teams, int32_t thread_limit) {
PRINT(LD_IO, "call kmpc_push_num_teams %d\n", num_teams);
ASSERT0(LT_FUSSY, FALSE,
"should never have anything with new teams on device");
}
-EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t tid,
+EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t tid,
int proc_bind) {
PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", proc_bind);
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index 21a419ce14c..2a2edcef03d 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -31,7 +31,7 @@ int32_t __gpu_block_reduce() {
}
EXTERN
-int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
+int32_t __kmpc_reduce_gpu(kmp_Ident *loc, int32_t global_tid, int32_t num_vars,
size_t reduce_size, void *reduce_data,
void *reduce_array_size, kmp_ReductFctPtr *reductFct,
kmp_CriticalName *lck) {
@@ -40,7 +40,8 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
int numthread;
if (currTaskDescr->IsParallelConstruct()) {
numthread =
- GetNumberOfOmpThreads(threadId, isSPMDMode(), isRuntimeUninitialized());
+ GetNumberOfOmpThreads(threadId, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc));
} else {
numthread = GetNumberOfOmpTeams();
}
@@ -55,12 +56,12 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
}
EXTERN
-int32_t __kmpc_reduce_combined(kmp_Indent *loc) {
+int32_t __kmpc_reduce_combined(kmp_Ident *loc) {
return threadIdx.x == 0 ? 2 : 0;
}
EXTERN
-int32_t __kmpc_reduce_simd(kmp_Indent *loc) {
+int32_t __kmpc_reduce_simd(kmp_Ident *loc) {
return (threadIdx.x % 32 == 0) ? 1 : 0;
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
index c93657e45e1..c5d2e91abc7 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -33,6 +33,59 @@ INLINE bool isRuntimeInitialized() {
}
////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE bool checkSPMDMode(kmp_Ident *loc) {
+ if (!loc)
+ return isSPMDMode();
+
+ // If SPMD is true then we are not in the UNDEFINED state so
+ // we can return immediately.
+ if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+ return true;
+
+ // If not in SPMD mode and runtime required is a valid
+ // combination of flags so we can return immediately.
+ if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+ return false;
+
+ // We are in underfined state.
+ return isSPMDMode();
+}
+
+INLINE bool checkGenericMode(kmp_Ident *loc) {
+ return !checkSPMDMode(loc);
+}
+
+INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
+ if (!loc)
+ return isRuntimeUninitialized();
+
+ // If runtime is required then we know we can't be
+ // in the undefined mode. We can return immediately.
+ if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+ return false;
+
+ // If runtime is required then we need to check is in
+ // SPMD mode or not. If not in SPMD mode then we end
+ // up in the UNDEFINED state that marks the orphaned
+ // functions.
+ if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+ return true;
+
+ // Check if we are in an UNDEFINED state. Undefined is denoted by
+ // non-SPMD + noRuntimeRequired which is a combination that
+ // cannot actually happen. Undefined states is used to mark orphaned
+ // functions.
+ return isRuntimeUninitialized();
+}
+
+INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
+ return !checkRuntimeUninitialized(loc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
// support: get info from machine
////////////////////////////////////////////////////////////////////////////////
@@ -78,8 +131,6 @@ INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
// id is GetMasterThreadID()) calls this routine, we return 0 because
// it is a shadow for the first worker.
INLINE int GetLogicalThreadIdInBlock() {
- // return GetThreadIdInBlock() % GetMasterThreadID();
-
// Implemented using control flow (predication) instead of with a modulo
// operation.
int tid = GetThreadIdInBlock();
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 0a99405e778..eb4ef00b4d7 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -17,11 +17,11 @@
// KMP Ordered calls
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_ordered\n");
}
-EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_end_ordered\n");
}
@@ -33,16 +33,16 @@ EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) {
// FIXME: what if not all threads (warps) participate to the barrier?
// We may need to implement it differently
-EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
__kmpc_barrier(loc_ref, tid);
PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
return 0;
}
-EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
+ if (checkRuntimeUninitialized(loc_ref)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
"Expected SPMD mode with uninitialized runtime.");
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
@@ -50,9 +50,9 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
omptarget_nvptx_TaskDescr *currTaskDescr =
omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
int numberOfActiveOMPThreads = GetNumberOfOmpThreads(
- tid, isSPMDMode(), /*isRuntimeUninitialized=*/false);
+ tid, checkSPMDMode(loc_ref), /*isRuntimeUninitialized=*/false);
if (numberOfActiveOMPThreads > 1) {
- if (isSPMDMode()) {
+ if (checkSPMDMode(loc_ref)) {
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
// The #threads parameter must be rounded up to the WARPSIZE.
@@ -72,7 +72,7 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
// Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0
// parallel region and that all worker threads participate.
-EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
__syncthreads();
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
@@ -80,7 +80,7 @@ EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) {
// Emit a simple barrier call in Generic mode. Assumes the caller is in an L0
// parallel region and that all worker threads participate.
-EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
int numberOfActiveOMPThreads = GetNumberOfThreadsInBlock() - WARPSIZE;
// The #threads parameter must be rounded up to the WARPSIZE.
int threads =
@@ -106,12 +106,12 @@ INLINE int32_t IsMaster() {
return IsTeamMaster(ompThreadId);
}
-EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid) {
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_master\n");
return IsMaster();
}
-EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_end_master\n");
ASSERT0(LT_FUSSY, IsMaster(), "expected only master here");
}
@@ -120,13 +120,13 @@ EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid) {
// KMP SINGLE
////////////////////////////////////////////////////////////////////////////////
-EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid) {
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_single\n");
// decide to implement single with master; master get the single
return IsMaster();
}
-EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_end_single\n");
// decide to implement single with master: master get the single
ASSERT0(LT_FUSSY, IsMaster(), "expected only master here");
@@ -137,7 +137,7 @@ EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) {
// Flush
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_flush(kmp_Indent *loc) {
+EXTERN void __kmpc_flush(kmp_Ident *loc) {
PRINT0(LD_IO, "call kmpc_flush\n");
__threadfence_block();
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu
index f0431abf24d..3e9b304ec40 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu
@@ -31,7 +31,7 @@
#include "omptarget-nvptx.h"
EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
- kmp_Indent *loc, // unused
+ kmp_Ident *loc, // unused
uint32_t global_tid, // unused
int32_t flag, // unused (because in our impl, all are immediately exec
size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
@@ -68,20 +68,20 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
return newKmpTaskDescr;
}
-EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
0);
}
-EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr,
int32_t depNum, void *depList,
int32_t noAliasDepNum,
void *noAliasDepList) {
PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
P64(newKmpTaskDescr));
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -114,11 +114,11 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
return 0;
}
-EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
P64(newKmpTaskDescr));
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -139,11 +139,11 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
// 4 & 5 ... done in complete
}
-EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
P64(newKmpTaskDescr));
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -164,37 +164,37 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
SafeFree(newExplicitTaskDescr, "explicit task descriptor");
}
-EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList) {
PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
// nothing to do as all our tasks are executed as final
}
-EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
// nothing to do as all our tasks are executed as final
}
-EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
// nothing to do as all our tasks are executed as final
}
-EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
int end_part) {
PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
// do nothing: tasks are executed immediately, no yielding allowed
return 0;
}
-EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
// nothing to do as all our tasks are executed as final
return 0;
}
-EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr, int if_val,
uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
int32_t sched, uint64_t grainsize, void *task_dup) {
OpenPOWER on IntegriCloud