summaryrefslogtreecommitdiffstats
path: root/openmp/libomptarget/plugins/cuda/src
diff options
context:
space:
mode:
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2019-06-04 15:05:53 +0000
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2019-06-04 15:05:53 +0000
commitc5fe030c166b0fee57b7a5dfea20f24f4571fe29 (patch)
treec6b9d25b30544a280b3eb90042d830f67f48c9c6 /openmp/libomptarget/plugins/cuda/src
parenta6e289e9f8b1a4b03c96176483e5ec478e281720 (diff)
downloadbcm5719-llvm-c5fe030c166b0fee57b7a5dfea20f24f4571fe29.tar.gz
bcm5719-llvm-c5fe030c166b0fee57b7a5dfea20f24f4571fe29.zip
[OpenMP][libomptarget] Enable usage of unified memory for declare target link variables
Summary: This patch enables the usage of a host variable on the device for declare target link variables when unified memory is available. Reviewers: ABataev, caomhin, grokos Reviewed By: grokos Subscribers: Hahnfeld, guansong, jdoerfert, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D60884 llvm-svn: 362505
Diffstat (limited to 'openmp/libomptarget/plugins/cuda/src')
-rw-r--r--openmp/libomptarget/plugins/cuda/src/rtl.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index fc0c1ecd187..844afa107bb 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -111,6 +111,9 @@ public:
int EnvNumTeams;
int EnvTeamLimit;
+ // OpenMP Requires Flags
+ int64_t RequiresFlags;
+
//static int EnvNumThreads;
static const int HardTeamLimit = 1<<16; // 64k
static const int HardThreadLimit = 1024;
@@ -227,6 +230,9 @@ public:
} else {
EnvNumTeams = -1;
}
+
+ // Default state.
+ RequiresFlags = OMP_REQ_UNDEFINED;
}
~RTLDeviceInfoTy() {
@@ -264,6 +270,12 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
int32_t __tgt_rtl_number_of_devices() { return DeviceInfo.NumberOfDevices; }
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
+ DP("Init requires flags to %ld\n", RequiresFlags);
+ DeviceInfo.RequiresFlags = RequiresFlags;
+ return RequiresFlags;
+}
+
int32_t __tgt_rtl_init_device(int32_t device_id) {
CUdevice cuDevice;
@@ -436,6 +448,17 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
DPxPTR(e - HostBegin), e->name, DPxPTR(cuptr));
entry.addr = (void *)cuptr;
+ if (DeviceInfo.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+ e->flags & OMP_DECLARE_TARGET_LINK) {
+ // If unified memory is present any target link variables
+ // can access host addresses directly. There is no longer a
+ // need for device copies.
+ cuMemcpyHtoD(cuptr, e->addr, sizeof(void *));
+ DP("Copy linked variable host address (" DPxMOD ")"
+ "to device address (" DPxMOD ")\n",
+ DPxPTR(*((void**)e->addr)), DPxPTR(cuptr));
+ }
+
DeviceInfo.addOffloadEntry(device_id, entry);
continue;
OpenPOWER on IntegriCloud