diff options
Diffstat (limited to 'parallel-libs/streamexecutor/lib/platforms')
4 files changed, 0 insertions, 380 deletions
diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt deleted file mode 100644 index 0802c059add..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) - add_subdirectory(cuda) -endif() diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt deleted file mode 100644 index 5be76d1c75a..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_library( - streamexecutor_cuda_platform - OBJECT - CUDAPlatform.cpp - CUDAPlatformDevice.cpp) diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp deleted file mode 100644 index 9f9e4388647..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDA platform internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatform.h" -#include "streamexecutor/Device.h" -#include "streamexecutor/Platform.h" -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" - -#include "llvm/Support/Mutex.h" - -#include "cuda.h" - -#include <map> - -namespace streamexecutor { -namespace cuda { - -static CUresult ensureCUDAInitialized() { - static CUresult InitResult = []() { return cuInit(0); }(); - return InitResult; -} - -size_t CUDAPlatform::getDeviceCount() const { - if (ensureCUDAInitialized()) - // TODO(jhen): Log an error. - return 0; - - int DeviceCount = 0; - CUresult Result = cuDeviceGetCount(&DeviceCount); - (void)Result; - // TODO(jhen): Log an error. - - return DeviceCount; -} - -Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) { - if (CUresult InitResult = ensureCUDAInitialized()) - return CUresultToError(InitResult, "cached cuInit return value"); - - llvm::sys::ScopedLock Lock(Mutex); - auto Iterator = PlatformDevices.find(DeviceIndex); - if (Iterator == PlatformDevices.end()) { - if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) { - Iterator = - PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first; - } else { - return MaybePDevice.takeError(); - } - } - return Device(&Iterator->second); -} - -} // namespace cuda -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp deleted file mode 100644 index 5284a9a0a35..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp +++ /dev/null @@ -1,307 +0,0 @@ -//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDAPlatformDevice. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" -#include "streamexecutor/PlatformDevice.h" - -#include "cuda.h" - -namespace streamexecutor { -namespace cuda { - -static void *offset(const void *Base, size_t Offset) { - return const_cast<char *>(static_cast<const char *>(Base) + Offset); -} - -Error CUresultToError(int CUResult, const llvm::Twine &Message) { - CUresult Result = static_cast<CUresult>(CUResult); - if (Result) { - const char *ErrorName; - if (cuGetErrorName(Result, &ErrorName)) - ErrorName = "UNKNOWN ERROR NAME"; - const char *ErrorString; - if (cuGetErrorString(Result, &ErrorString)) - ErrorString = "UNKNOWN ERROR DESCRIPTION"; - return make_error("CUDA driver error: '" + Message + "', error code = " + - llvm::Twine(static_cast<int>(Result)) + ", name = " + - ErrorName + ", description = '" + ErrorString + "'"); - } else - return Error::success(); -} - -std::string CUDAPlatformDevice::getName() const { - static std::string CachedName = [](int DeviceIndex) { - static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024; - std::string Name = "CUDA device " + std::to_string(DeviceIndex); - char NameFromDriver[MAX_DRIVER_NAME_BYTES]; - if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1, - DeviceIndex)) { - NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0'; - Name.append(": ").append(NameFromDriver); - } - return Name; - }(DeviceIndex); - return CachedName; -} - -Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) { - CUdevice DeviceHandle; - if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex)) - return CUresultToError(Result, "cuDeviceGet"); - - CUcontext ContextHandle; - if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle)) - return CUresultToError(Result, "cuDevicePrimaryCtxRetain"); - - if (CUresult Result = cuCtxSetCurrent(ContextHandle)) - return CUresultToError(Result, "cuCtxSetCurrent"); - - return CUDAPlatformDevice(DeviceIndex); -} - -CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept - : DeviceIndex(Other.DeviceIndex) { - Other.DeviceIndex = -1; -} - -CUDAPlatformDevice &CUDAPlatformDevice:: -operator=(CUDAPlatformDevice &&Other) noexcept { - DeviceIndex = Other.DeviceIndex; - Other.DeviceIndex = -1; - return *this; -} - -CUDAPlatformDevice::~CUDAPlatformDevice() { - CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex); - (void)Result; - // TODO(jhen): Log error. -} - -Expected<const void *> -CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) { - // TODO(jhen): Maybe first check loaded modules? - if (!Spec.hasCUDAPTXInMemory()) - return make_error("no CUDA code available to create kernel"); - - CUdevice Device = static_cast<int>(DeviceIndex); - int ComputeCapabilityMajor = 0; - int ComputeCapabilityMinor = 0; - if (CUresult Result = cuDeviceGetAttribute( - &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - Device)) - return CUresultToError( - Result, - "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR"); - if (CUresult Result = cuDeviceGetAttribute( - &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - Device)) - return CUresultToError( - Result, - "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR"); - const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor, - ComputeCapabilityMinor); - - if (!Code) - return make_error("no suitable CUDA source found for compute capability " + - llvm::Twine(ComputeCapabilityMajor) + "." + - llvm::Twine(ComputeCapabilityMinor)); - - CUmodule Module; - if (CUresult Result = cuModuleLoadData(&Module, Code)) - return CUresultToError(Result, "cuModuleLoadData"); - - CUfunction Function; - if (CUresult Result = - cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str())) - return CUresultToError(Result, "cuModuleGetFunction"); - - // TODO(jhen): Should I save this function pointer in case someone asks for - // it again? - - // TODO(jhen): Should I save the module pointer so I can unload it when I - // destroy this device? - - return static_cast<const void *>(Function); -} - -Error CUDAPlatformDevice::destroyKernel(const void *Handle) { - // TODO(jhen): Maybe keep track of kernels for each module and unload the - // module after they are all destroyed. - return Error::success(); -} - -Expected<const void *> CUDAPlatformDevice::createStream() { - CUstream Stream; - if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT)) - return CUresultToError(Result, "cuStreamCreate"); - return Stream; -} - -Error CUDAPlatformDevice::destroyStream(const void *Handle) { - return CUresultToError( - cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))), - "cuStreamDestroy"); -} - -Error CUDAPlatformDevice::launch( - const void *PlatformStreamHandle, BlockDimensions BlockSize, - GridDimensions GridSize, const void *PKernelHandle, - const PackedKernelArgumentArrayBase &ArgumentArray) { - CUfunction Function = - reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle)); - CUstream Stream = - reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle)); - - auto Launch = [Function, Stream, BlockSize, - GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) { - return CUresultToError( - cuLaunchKernel(Function, // - GridSize.X, GridSize.Y, GridSize.Z, // - BlockSize.X, BlockSize.Y, BlockSize.Z, // - SharedMemoryBytes, Stream, ArgumentAddresses, nullptr), - "cuLaunchKernel"); - }; - - void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses()); - size_t SharedArgumentCount = ArgumentArray.getSharedCount(); - if (SharedArgumentCount) { - // The argument handling in this case is not very efficient. We may need to - // come back and optimize it later. - // - // Perhaps introduce another branch for the case where there is exactly one - // shared memory argument and it is the first one. This is the only case - // that will be used for compiler-generated CUDA kernels, and OpenCL users - // can choose to take advantage of it by combining their dynamic shared - // memory arguments and putting them first in the kernel signature. - unsigned SharedMemoryBytes = 0; - size_t ArgumentCount = ArgumentArray.getArgumentCount(); - llvm::SmallVector<void *, 16> NonSharedArgumentAddresses( - ArgumentCount - SharedArgumentCount); - size_t NonSharedIndex = 0; - for (size_t I = 0; I < ArgumentCount; ++I) - if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY) - SharedMemoryBytes += ArgumentArray.getSize(I); - else - NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I]; - return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data()); - } - return Launch(0, ArgumentAddresses); -} - -Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle, - const void *DeviceSrcHandle, - size_t SrcByteOffset, void *HostDst, - size_t DstByteOffset, size_t ByteCount) { - return CUresultToError( - cuMemcpyDtoHAsync( - offset(HostDst, DstByteOffset), - reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), - ByteCount, - static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), - "cuMemcpyDtoHAsync"); -} - -Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle, - const void *HostSrc, size_t SrcByteOffset, - const void *DeviceDstHandle, - size_t DstByteOffset, size_t ByteCount) { - return CUresultToError( - cuMemcpyHtoDAsync( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - offset(HostSrc, SrcByteOffset), ByteCount, - static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), - "cuMemcpyHtoDAsync"); -} - -Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle, - const void *DeviceSrcHandle, - size_t SrcByteOffset, - const void *DeviceDstHandle, - size_t DstByteOffset, size_t ByteCount) { - return CUresultToError( - cuMemcpyDtoDAsync( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), - ByteCount, - static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), - "cuMemcpyDtoDAsync"); -} - -Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) { - return CUresultToError(cuStreamSynchronize(static_cast<CUstream>( - const_cast<void *>(PlatformStreamHandle))), - "cuStreamSynchronize"); -} - -Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) { - CUdeviceptr Pointer; - if (CUresult Result = cuMemAlloc(&Pointer, ByteCount)) - return CUresultToError(Result, "cuMemAlloc"); - return reinterpret_cast<void *>(Pointer); -} - -Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) { - return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)), - "cuMemFree"); -} - -Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) { - return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u), - "cuMemHostRegister"); -} - -Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) { - return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)), - "cuMemHostUnregister"); -} - -Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle, - size_t SrcByteOffset, - void *HostDst, - size_t DstByteOffset, - size_t ByteCount) { - return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset), - reinterpret_cast<CUdeviceptr>(offset( - DeviceSrcHandle, SrcByteOffset)), - ByteCount), - "cuMemcpyDtoH"); -} - -Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc, - size_t SrcByteOffset, - const void *DeviceDstHandle, - size_t DstByteOffset, - size_t ByteCount) { - return CUresultToError( - cuMemcpyHtoD( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - offset(HostSrc, SrcByteOffset), ByteCount), - "cuMemcpyHtoD"); -} - -Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle, - size_t DstByteOffset, - const void *DeviceSrcHandle, - size_t SrcByteOffset, - size_t ByteCount) { - return CUresultToError( - cuMemcpyDtoD( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), - ByteCount), - "cuMemcpyDtoD"); -} - -} // namespace cuda -} // namespace streamexecutor |

