[libomptarget][CUDA] Use cuDeviceGetAttribute, NFCI.

cuDeviceGetProperties has apparently been deprecated since CUDA 5.0. Nvidia started using annotations only in CUDA 9.2, so nobody noticed nor cared before. The new function returns the same values, tested with a P100. Differential Revision: https://reviews.llvm.org/D51624 llvm-svn: 341372
author: Jonas Hahnfeld <hahnjo@hahnjo.de> 2018-09-04 15:13:28 +0000
committer: Jonas Hahnfeld <hahnjo@hahnjo.de> 2018-09-04 15:13:28 +0000
commit: bb51d398713a11cf407cf16eda54dc6702b832cf (patch)
tree: 8e3d09880f7f93533621e95035277722ddd9e26b /openmp/libomptarget/plugins
parent: f7f86971e699fd8c860daf88ed06faf670b3258c (diff)
download: bcm5719-llvm-bb51d398713a11cf407cf16eda54dc6702b832cf.tar.gz
bcm5719-llvm-bb51d398713a11cf407cf16eda54dc6702b832cf.zip
1 files changed, 37 insertions, 32 deletions
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index 872e7f02aed..d265a87b83d 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -285,43 +285,48 @@ int32_t __tgt_rtl_init_device(int32_t device_id) {
     return OFFLOAD_FAIL;
   }
 
-  // scan properties to determine number of threads/block and blocks/grid.
-  CUdevprop Properties;
-  err = cuDeviceGetProperties(&Properties, cuDevice);
+  // Query attributes to determine number of threads/block and blocks/grid.
+  int maxGridDimX;
+  err = cuDeviceGetAttribute(&maxGridDimX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
+                             cuDevice);
   if (err != CUDA_SUCCESS) {
-    DP("Error getting device Properties, use defaults\n");
+    DP("Error getting max grid dimension, use default\n");
     DeviceInfo.BlocksPerGrid[device_id] = RTLDeviceInfoTy::DefaultNumTeams;
-    DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads;
-    DeviceInfo.WarpSize[device_id] = 32;
+  } else if (maxGridDimX <= RTLDeviceInfoTy::HardTeamLimit) {
+    DeviceInfo.BlocksPerGrid[device_id] = maxGridDimX;
+    DP("Using %d CUDA blocks per grid\n", maxGridDimX);
   } else {
-    // Get blocks per grid
-    if (Properties.maxGridSize[0] <= RTLDeviceInfoTy::HardTeamLimit) {
-      DeviceInfo.BlocksPerGrid[device_id] = Properties.maxGridSize[0];
-      DP("Using %d CUDA blocks per grid\n", Properties.maxGridSize[0]);
-    } else {
-      DeviceInfo.BlocksPerGrid[device_id] = RTLDeviceInfoTy::HardTeamLimit;
-      DP("Max CUDA blocks per grid %d exceeds the hard team limit %d, capping "
-          "at the hard limit\n", Properties.maxGridSize[0],
-          RTLDeviceInfoTy::HardTeamLimit);
-    }
+    DeviceInfo.BlocksPerGrid[device_id] = RTLDeviceInfoTy::HardTeamLimit;
+    DP("Max CUDA blocks per grid %d exceeds the hard team limit %d, capping "
+       "at the hard limit\n",
+       maxGridDimX, RTLDeviceInfoTy::HardTeamLimit);
+  }
 
-    // Get threads per block, exploit threads only along x axis
-    if (Properties.maxThreadsDim[0] <= RTLDeviceInfoTy::HardThreadLimit) {
-      DeviceInfo.ThreadsPerBlock[device_id] = Properties.maxThreadsDim[0];
-      DP("Using %d CUDA threads per block\n", Properties.maxThreadsDim[0]);
-      if (Properties.maxThreadsDim[0] < Properties.maxThreadsPerBlock) {
-        DP("(fewer than max per block along all xyz dims %d)\n",
-            Properties.maxThreadsPerBlock);
-      }
-    } else {
-      DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::HardThreadLimit;
-      DP("Max CUDA threads per block %d exceeds the hard thread limit %d, "
-          "capping at the hard limit\n", Properties.maxThreadsDim[0],
-          RTLDeviceInfoTy::HardThreadLimit);
-    }
+  // We are only exploiting threads along the x axis.
+  int maxBlockDimX;
+  err = cuDeviceGetAttribute(&maxBlockDimX, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
+                             cuDevice);
+  if (err != CUDA_SUCCESS) {
+    DP("Error getting max block dimension, use default\n");
+    DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads;
+  } else if (maxBlockDimX <= RTLDeviceInfoTy::HardThreadLimit) {
+    DeviceInfo.ThreadsPerBlock[device_id] = maxBlockDimX;
+    DP("Using %d CUDA threads per block\n", maxBlockDimX);
+  } else {
+    DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::HardThreadLimit;
+    DP("Max CUDA threads per block %d exceeds the hard thread limit %d, capping"
+       "at the hard limit\n",
+       maxBlockDimX, RTLDeviceInfoTy::HardThreadLimit);
+  }
 
-    // According to the documentation, SIMDWidth is "Warp size in threads".
-    DeviceInfo.WarpSize[device_id] = Properties.SIMDWidth;
+  int warpSize;
+  err =
+      cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, cuDevice);
+  if (err != CUDA_SUCCESS) {
+    DP("Error getting warp size, assume default\n");
+    DeviceInfo.WarpSize[device_id] = 32;
+  } else {
+    DeviceInfo.WarpSize[device_id] = warpSize;
   }
 
   // Adjust teams to the env variables
author	Jonas Hahnfeld <hahnjo@hahnjo.de>	2018-09-04 15:13:28 +0000
committer	Jonas Hahnfeld <hahnjo@hahnjo.de>	2018-09-04 15:13:28 +0000
commit	bb51d398713a11cf407cf16eda54dc6702b832cf (patch)
tree	8e3d09880f7f93533621e95035277722ddd9e26b /openmp/libomptarget/plugins
parent	f7f86971e699fd8c860daf88ed06faf670b3258c (diff)
download	bcm5719-llvm-bb51d398713a11cf407cf16eda54dc6702b832cf.tar.gz bcm5719-llvm-bb51d398713a11cf407cf16eda54dc6702b832cf.zip