summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-07-08 15:30:23 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-07-08 15:30:23 +0000
commit060921dee71500a3096f7ed5b32620b1066f5724 (patch)
tree58f4d2b507cb30770bf93c5128ecde5066eca450
parent4aa2873fdce520fdc29b68cf29034fe64dc6f0af (diff)
downloadbcm5719-llvm-060921dee71500a3096f7ed5b32620b1066f5724.tar.gz
bcm5719-llvm-060921dee71500a3096f7ed5b32620b1066f5724.zip
[OPENMP]Make __kmpc_push_tripcount thread safe.
Summary: __kmpc_push_tripcount function is not thread safe and may lead to data race when the target regions are executed in parallel threads. The patch makes loopTripCnt counter thread aware and stores the tripcount value per thread in the map. Access to map is guarded by mutex to prevent data race in the map itself. Test is for NVPTX target because it does not work correctly on the host. Seems to me, there is a problem in libomp with target regions in the parallel threads. Reviewers: grokos Subscribers: guansong, jfb, jdoerfert, openmp-commits, kkwli0, caomhin Tags: #openmp Differential Revision: https://reviews.llvm.org/D64080 llvm-svn: 365332
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c22
-rw-r--r--openmp/libomptarget/src/device.h6
-rw-r--r--openmp/libomptarget/src/interface.cpp7
-rw-r--r--openmp/libomptarget/src/omptarget.cpp8
-rw-r--r--openmp/libomptarget/src/private.h1
5 files changed, 37 insertions, 7 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c b/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c
new file mode 100644
index 00000000000..b3f87685640
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c
@@ -0,0 +1,22 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int res = 0;
+
+#pragma omp parallel num_threads(2) reduction(+:res)
+ {
+ int tid = omp_get_thread_num();
+#pragma omp target teams distribute reduction(+:res)
+ for (int i = tid; i < 2; i++)
+ ++res;
+ }
+ // The first thread makes 2 iterations, the second - 1. Expected result of the
+ // reduction res is 3.
+
+ // CHECK: res = 3.
+ printf("res = %d.\n", res);
+ return 0;
+}
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index 2e1ad712d45..f4cc8adeeac 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -96,7 +96,9 @@ struct DeviceTy {
std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx;
- uint64_t loopTripCnt;
+ // NOTE: Once libomp gains full target-task support, this state should be
+ // moved into the target task in libomp.
+ std::map<int32_t, uint64_t> loopTripCnt;
int64_t RTLRequiresFlags;
@@ -104,7 +106,7 @@ struct DeviceTy {
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
HasPendingGlobals(false), HostDataToTargetMap(),
PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(),
- ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {}
+ ShadowMtx(), RTLRequiresFlags(0) {}
// The existence of mutexes makes DeviceTy non-copyable. We need to
// provide a copy constructor and an assignment operator explicitly.
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index d0553246378..2a98b5c976a 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -304,8 +304,6 @@ EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
arg_sizes, arg_types, team_num, thread_limit);
}
-
-// The trip count mechanism will be revised - this scheme is not thread-safe.
EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
uint64_t loop_tripcount) {
if (device_id == OFFLOAD_DEVICE_DEFAULT) {
@@ -320,5 +318,8 @@ EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
loop_tripcount);
- Devices[device_id].loopTripCnt = loop_tripcount;
+ TblMapMtx.lock();
+ Devices[device_id].loopTripCnt.emplace(__kmpc_global_thread_num(NULL),
+ loop_tripcount);
+ TblMapMtx.unlock();
}
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 8272b4ec37c..1e8f94266db 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -729,8 +729,12 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
"Size mismatch in arguments and offsets");
// Pop loop trip count
- uint64_t ltc = Device.loopTripCnt;
- Device.loopTripCnt = 0;
+ uint64_t ltc = 0;
+ TblMapMtx.lock();
+ auto I = Device.loopTripCnt.find(__kmpc_global_thread_num(NULL));
+ if (I != Device.loopTripCnt.end())
+ std::swap(ltc, I->second);
+ TblMapMtx.unlock();
// Launch device execution.
DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index b48415cc61a..b406909bd54 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -65,6 +65,7 @@ extern "C" {
// functions that extract info from libomp; keep in sync
int omp_get_default_device(void) __attribute__((weak));
int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak));
+int32_t __kmpc_global_thread_num(void *) __attribute__((weak));
int __kmpc_get_target_offload(void) __attribute__((weak));
#ifdef __cplusplus
}
OpenPOWER on IntegriCloud