diff options
Diffstat (limited to 'parallel-libs/streamexecutor/lib')
16 files changed, 0 insertions, 885 deletions
diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt deleted file mode 100644 index 6157654a97a..00000000000 --- a/parallel-libs/streamexecutor/lib/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -macro(add_se_library name) - add_llvm_library(${name} ${ARGN}) - set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries") -endmacro(add_se_library) - -add_subdirectory(platforms) - -add_se_library( - streamexecutor - Device.cpp - DeviceMemory.cpp - Error.cpp - HostMemory.cpp - Kernel.cpp - KernelSpec.cpp - PackedKernelArgumentArray.cpp - Platform.cpp - PlatformDevice.cpp - PlatformManager.cpp - Stream.cpp - ${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT} - LINK_LIBS - ${STREAM_EXECUTOR_LIBCUDA_LIBRARIES}) - -install(TARGETS streamexecutor DESTINATION lib) diff --git a/parallel-libs/streamexecutor/lib/Device.cpp b/parallel-libs/streamexecutor/lib/Device.cpp deleted file mode 100644 index 2bed3e7be16..00000000000 --- a/parallel-libs/streamexecutor/lib/Device.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- Device.cpp - Device implementation --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of Device class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Device.h" - -#include <cassert> - -#include "streamexecutor/PlatformDevice.h" -#include "streamexecutor/Stream.h" - -#include "llvm/ADT/STLExtras.h" - -namespace streamexecutor { - -Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {} - -Device::~Device() = default; - -Expected<Stream> Device::createStream() { - Expected<const void *> MaybePlatformStream = PDevice->createStream(); - if (!MaybePlatformStream) - return MaybePlatformStream.takeError(); - return Stream(PDevice, *MaybePlatformStream); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp b/parallel-libs/streamexecutor/lib/DeviceMemory.cpp deleted file mode 100644 index 8447a60b1ca..00000000000 --- a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===-- DeviceMemory.cpp - DeviceMemory implementation --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of DeviceMemory class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/DeviceMemory.h" - -#include "streamexecutor/Device.h" - -namespace streamexecutor { - -GlobalDeviceMemoryBase::~GlobalDeviceMemoryBase() { - if (Handle) - // TODO(jhen): How to handle errors here. - consumeError(TheDevice->freeDeviceMemory(*this)); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Error.cpp b/parallel-libs/streamexecutor/lib/Error.cpp deleted file mode 100644 index 0d728fab669..00000000000 --- a/parallel-libs/streamexecutor/lib/Error.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===-- Error.cpp - Error handling ----------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Types for returning recoverable errors. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Error.h" - -#include "llvm/ADT/StringRef.h" - -namespace { - -// An error with a string message describing the cause. -class StreamExecutorError : public llvm::ErrorInfo<StreamExecutorError> { -public: - StreamExecutorError(llvm::StringRef Message) : Message(Message.str()) {} - - void log(llvm::raw_ostream &OS) const override { OS << Message; } - - std::error_code convertToErrorCode() const override { - llvm_unreachable( - "StreamExecutorError does not support conversion to std::error_code"); - } - - std::string getErrorMessage() const { return Message; } - - static char ID; - -private: - std::string Message; -}; - -char StreamExecutorError::ID = 0; - -} // namespace - -namespace streamexecutor { - -Error make_error(const Twine &Message) { - return llvm::make_error<StreamExecutorError>(Message.str()); -} - -std::string consumeAndGetMessage(Error &&E) { - if (!E) - return "success"; - std::string Message; - llvm::handleAllErrors(std::move(E), - [&Message](const StreamExecutorError &SEE) { - Message = SEE.getErrorMessage(); - }); - return Message; -} - -void dieIfError(Error &&E) { - if (E) { - std::fprintf(stderr, "Error encountered: %s.\n", - streamexecutor::consumeAndGetMessage(std::move(E)).c_str()); - std::exit(EXIT_FAILURE); - } -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/HostMemory.cpp b/parallel-libs/streamexecutor/lib/HostMemory.cpp deleted file mode 100644 index 8eba7e6b563..00000000000 --- a/parallel-libs/streamexecutor/lib/HostMemory.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===-- HostMemory.cpp - HostMemory implementation ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of HostMemory internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/HostMemory.h" -#include "streamexecutor/Device.h" - -namespace streamexecutor { -namespace internal { - -void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer) { - // TODO(jhen): How to handle errors here? - if (Pointer) - consumeError(TheDevice->unregisterHostMemory(Pointer)); -} - -} // namespace internal -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp deleted file mode 100644 index 911ac6656aa..00000000000 --- a/parallel-libs/streamexecutor/lib/Kernel.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===-- Kernel.cpp - General kernel implementation ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation details for kernel types. -/// -//===----------------------------------------------------------------------===// - -#include <cassert> - -#include "streamexecutor/Device.h" -#include "streamexecutor/Kernel.h" -#include "streamexecutor/PlatformDevice.h" - -#include "llvm/DebugInfo/Symbolize/Symbolize.h" - -namespace streamexecutor { - -KernelBase::KernelBase(PlatformDevice *D, const void *PlatformKernelHandle, - llvm::StringRef Name) - : PDevice(D), PlatformKernelHandle(PlatformKernelHandle), Name(Name), - DemangledName( - llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr)) { - assert(D != nullptr && - "cannot construct a kernel object with a null platform device"); - assert(PlatformKernelHandle != nullptr && - "cannot construct a kernel object with a null platform kernel handle"); -} - -KernelBase::KernelBase(KernelBase &&Other) noexcept - : PDevice(Other.PDevice), PlatformKernelHandle(Other.PlatformKernelHandle), - Name(std::move(Other.Name)), - DemangledName(std::move(Other.DemangledName)) { - Other.PDevice = nullptr; - Other.PlatformKernelHandle = nullptr; -} - -KernelBase &KernelBase::operator=(KernelBase &&Other) noexcept { - PDevice = Other.PDevice; - PlatformKernelHandle = Other.PlatformKernelHandle; - Name = std::move(Other.Name); - DemangledName = std::move(Other.DemangledName); - Other.PDevice = nullptr; - Other.PlatformKernelHandle = nullptr; - return *this; -} - -KernelBase::~KernelBase() { - if (PlatformKernelHandle) - // TODO(jhen): Handle the error here. - consumeError(PDevice->destroyKernel(PlatformKernelHandle)); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/KernelSpec.cpp b/parallel-libs/streamexecutor/lib/KernelSpec.cpp deleted file mode 100644 index 951ea8fc41c..00000000000 --- a/parallel-libs/streamexecutor/lib/KernelSpec.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//===-- KernelSpec.cpp - General kernel spec implementation ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation details for kernel loader specs. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/KernelSpec.h" - -#include "llvm/ADT/STLExtras.h" - -namespace streamexecutor { - -KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName) - : KernelName(KernelName) {} - -CUDAPTXInMemorySpec::CUDAPTXInMemorySpec( - llvm::StringRef KernelName, - const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) - : KernelLoaderSpec(KernelName) { - for (const auto &Spec : SpecList) - PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode); -} - -const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor, - int ComputeCapabilityMinor) const { - auto Iterator = - PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{ - ComputeCapabilityMajor, ComputeCapabilityMinor}); - if (Iterator == PTXByComputeCapability.begin()) - return nullptr; - --Iterator; - return Iterator->second; -} - -CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName, - const void *Bytes) - : KernelLoaderSpec(KernelName), Bytes(Bytes) {} - -OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName, - const char *Text) - : KernelLoaderSpec(KernelName), Text(Text) {} - -void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) { - if (TheKernelName) - assert(KernelName.equals(*TheKernelName) && - "different kernel names in one MultiKernelLoaderSpec"); - else - TheKernelName = llvm::make_unique<std::string>(KernelName); -} - -MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory( - llvm::StringRef KernelName, - llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) { - assert((TheCUDAPTXInMemorySpec == nullptr) && - "illegal loader spec overwrite"); - setKernelName(KernelName); - TheCUDAPTXInMemorySpec = - llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList); - return *this; -} - -MultiKernelLoaderSpec & -MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName, - const void *Bytes) { - assert((TheCUDAFatbinInMemorySpec == nullptr) && - "illegal loader spec overwrite"); - setKernelName(KernelName); - TheCUDAFatbinInMemorySpec = - llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes); - return *this; -} - -MultiKernelLoaderSpec & -MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName, - const char *OpenCLText) { - assert((TheOpenCLTextInMemorySpec == nullptr) && - "illegal loader spec overwrite"); - setKernelName(KernelName); - TheOpenCLTextInMemorySpec = - llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText); - return *this; -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp b/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp deleted file mode 100644 index 04ac80d74ed..00000000000 --- a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation details for classes from PackedKernelArgumentArray.h. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/PackedKernelArgumentArray.h" - -namespace streamexecutor { - -PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default; - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Platform.cpp b/parallel-libs/streamexecutor/lib/Platform.cpp deleted file mode 100644 index 4250468a022..00000000000 --- a/parallel-libs/streamexecutor/lib/Platform.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- Platform.cpp - Platform implementation ----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of Platform class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Platform.h" - -namespace streamexecutor { - -Platform::~Platform() = default; - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp b/parallel-libs/streamexecutor/lib/PlatformDevice.cpp deleted file mode 100644 index 8dd44a3a4aa..00000000000 --- a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- PlatformDevice.cpp - Platform interface implementations -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation file for PlatformDevice.h. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/PlatformDevice.h" - -namespace streamexecutor { - -PlatformDevice::~PlatformDevice() = default; - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/PlatformManager.cpp b/parallel-libs/streamexecutor/lib/PlatformManager.cpp deleted file mode 100644 index 8f44befbd71..00000000000 --- a/parallel-libs/streamexecutor/lib/PlatformManager.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===-- PlatformManager.cpp - PlatformManager implementation --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of PlatformManager class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/PlatformManager.h" - -#include "streamexecutor/PlatformOptions.h" -#include "streamexecutor/platforms/host/HostPlatform.h" - -#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM -#include "streamexecutor/platforms/cuda/CUDAPlatform.h" -#endif - -namespace streamexecutor { - -PlatformManager::PlatformManager() { - // TODO(jhen): Register known platforms by name. - // We have a couple of options here: - // * Use build-system flags to set preprocessor macros that select the - // appropriate code to include here. - // * Use static initialization tricks to have platform libraries register - // themselves when they are loaded. - - PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>()); - -#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM - PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>()); -#endif -} - -Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) { - static PlatformManager Instance; - auto Iterator = Instance.PlatformsByName.find(Name.lower()); - if (Iterator != Instance.PlatformsByName.end()) - return Iterator->second.get(); - return make_error("no available platform with name " + Name); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp deleted file mode 100644 index fe135b4d0af..00000000000 --- a/parallel-libs/streamexecutor/lib/Stream.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===-- Stream.cpp - General stream implementation ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation details for a general stream object. -/// -//===----------------------------------------------------------------------===// - -#include <cassert> - -#include "streamexecutor/Stream.h" - -namespace streamexecutor { - -Stream::Stream(PlatformDevice *D, const void *PlatformStreamHandle) - : PDevice(D), PlatformStreamHandle(PlatformStreamHandle), - ErrorMessageMutex(llvm::make_unique<llvm::sys::RWMutex>()) { - assert(D != nullptr && - "cannot construct a stream object with a null platform device"); - assert(PlatformStreamHandle != nullptr && - "cannot construct a stream object with a null platform stream handle"); -} - -Stream::Stream(Stream &&Other) noexcept - : PDevice(Other.PDevice), PlatformStreamHandle(Other.PlatformStreamHandle), - ErrorMessageMutex(std::move(Other.ErrorMessageMutex)), - ErrorMessage(std::move(Other.ErrorMessage)) { - Other.PDevice = nullptr; - Other.PlatformStreamHandle = nullptr; -} - -Stream &Stream::operator=(Stream &&Other) noexcept { - PDevice = Other.PDevice; - PlatformStreamHandle = Other.PlatformStreamHandle; - ErrorMessageMutex = std::move(Other.ErrorMessageMutex); - ErrorMessage = std::move(Other.ErrorMessage); - Other.PDevice = nullptr; - Other.PlatformStreamHandle = nullptr; - return *this; -} - -Stream::~Stream() { - if (PlatformStreamHandle) - // TODO(jhen): Handle error condition here. - consumeError(PDevice->destroyStream(PlatformStreamHandle)); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt deleted file mode 100644 index 0802c059add..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) - add_subdirectory(cuda) -endif() diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt deleted file mode 100644 index 5be76d1c75a..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_library( - streamexecutor_cuda_platform - OBJECT - CUDAPlatform.cpp - CUDAPlatformDevice.cpp) diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp deleted file mode 100644 index 9f9e4388647..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDA platform internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatform.h" -#include "streamexecutor/Device.h" -#include "streamexecutor/Platform.h" -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" - -#include "llvm/Support/Mutex.h" - -#include "cuda.h" - -#include <map> - -namespace streamexecutor { -namespace cuda { - -static CUresult ensureCUDAInitialized() { - static CUresult InitResult = []() { return cuInit(0); }(); - return InitResult; -} - -size_t CUDAPlatform::getDeviceCount() const { - if (ensureCUDAInitialized()) - // TODO(jhen): Log an error. - return 0; - - int DeviceCount = 0; - CUresult Result = cuDeviceGetCount(&DeviceCount); - (void)Result; - // TODO(jhen): Log an error. - - return DeviceCount; -} - -Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) { - if (CUresult InitResult = ensureCUDAInitialized()) - return CUresultToError(InitResult, "cached cuInit return value"); - - llvm::sys::ScopedLock Lock(Mutex); - auto Iterator = PlatformDevices.find(DeviceIndex); - if (Iterator == PlatformDevices.end()) { - if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) { - Iterator = - PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first; - } else { - return MaybePDevice.takeError(); - } - } - return Device(&Iterator->second); -} - -} // namespace cuda -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp deleted file mode 100644 index 5284a9a0a35..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp +++ /dev/null @@ -1,307 +0,0 @@ -//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDAPlatformDevice. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" -#include "streamexecutor/PlatformDevice.h" - -#include "cuda.h" - -namespace streamexecutor { -namespace cuda { - -static void *offset(const void *Base, size_t Offset) { - return const_cast<char *>(static_cast<const char *>(Base) + Offset); -} - -Error CUresultToError(int CUResult, const llvm::Twine &Message) { - CUresult Result = static_cast<CUresult>(CUResult); - if (Result) { - const char *ErrorName; - if (cuGetErrorName(Result, &ErrorName)) - ErrorName = "UNKNOWN ERROR NAME"; - const char *ErrorString; - if (cuGetErrorString(Result, &ErrorString)) - ErrorString = "UNKNOWN ERROR DESCRIPTION"; - return make_error("CUDA driver error: '" + Message + "', error code = " + - llvm::Twine(static_cast<int>(Result)) + ", name = " + - ErrorName + ", description = '" + ErrorString + "'"); - } else - return Error::success(); -} - -std::string CUDAPlatformDevice::getName() const { - static std::string CachedName = [](int DeviceIndex) { - static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024; - std::string Name = "CUDA device " + std::to_string(DeviceIndex); - char NameFromDriver[MAX_DRIVER_NAME_BYTES]; - if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1, - DeviceIndex)) { - NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0'; - Name.append(": ").append(NameFromDriver); - } - return Name; - }(DeviceIndex); - return CachedName; -} - -Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) { - CUdevice DeviceHandle; - if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex)) - return CUresultToError(Result, "cuDeviceGet"); - - CUcontext ContextHandle; - if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle)) - return CUresultToError(Result, "cuDevicePrimaryCtxRetain"); - - if (CUresult Result = cuCtxSetCurrent(ContextHandle)) - return CUresultToError(Result, "cuCtxSetCurrent"); - - return CUDAPlatformDevice(DeviceIndex); -} - -CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept - : DeviceIndex(Other.DeviceIndex) { - Other.DeviceIndex = -1; -} - -CUDAPlatformDevice &CUDAPlatformDevice:: -operator=(CUDAPlatformDevice &&Other) noexcept { - DeviceIndex = Other.DeviceIndex; - Other.DeviceIndex = -1; - return *this; -} - -CUDAPlatformDevice::~CUDAPlatformDevice() { - CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex); - (void)Result; - // TODO(jhen): Log error. -} - -Expected<const void *> -CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) { - // TODO(jhen): Maybe first check loaded modules? - if (!Spec.hasCUDAPTXInMemory()) - return make_error("no CUDA code available to create kernel"); - - CUdevice Device = static_cast<int>(DeviceIndex); - int ComputeCapabilityMajor = 0; - int ComputeCapabilityMinor = 0; - if (CUresult Result = cuDeviceGetAttribute( - &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - Device)) - return CUresultToError( - Result, - "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR"); - if (CUresult Result = cuDeviceGetAttribute( - &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - Device)) - return CUresultToError( - Result, - "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR"); - const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor, - ComputeCapabilityMinor); - - if (!Code) - return make_error("no suitable CUDA source found for compute capability " + - llvm::Twine(ComputeCapabilityMajor) + "." + - llvm::Twine(ComputeCapabilityMinor)); - - CUmodule Module; - if (CUresult Result = cuModuleLoadData(&Module, Code)) - return CUresultToError(Result, "cuModuleLoadData"); - - CUfunction Function; - if (CUresult Result = - cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str())) - return CUresultToError(Result, "cuModuleGetFunction"); - - // TODO(jhen): Should I save this function pointer in case someone asks for - // it again? - - // TODO(jhen): Should I save the module pointer so I can unload it when I - // destroy this device? - - return static_cast<const void *>(Function); -} - -Error CUDAPlatformDevice::destroyKernel(const void *Handle) { - // TODO(jhen): Maybe keep track of kernels for each module and unload the - // module after they are all destroyed. - return Error::success(); -} - -Expected<const void *> CUDAPlatformDevice::createStream() { - CUstream Stream; - if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT)) - return CUresultToError(Result, "cuStreamCreate"); - return Stream; -} - -Error CUDAPlatformDevice::destroyStream(const void *Handle) { - return CUresultToError( - cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))), - "cuStreamDestroy"); -} - -Error CUDAPlatformDevice::launch( - const void *PlatformStreamHandle, BlockDimensions BlockSize, - GridDimensions GridSize, const void *PKernelHandle, - const PackedKernelArgumentArrayBase &ArgumentArray) { - CUfunction Function = - reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle)); - CUstream Stream = - reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle)); - - auto Launch = [Function, Stream, BlockSize, - GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) { - return CUresultToError( - cuLaunchKernel(Function, // - GridSize.X, GridSize.Y, GridSize.Z, // - BlockSize.X, BlockSize.Y, BlockSize.Z, // - SharedMemoryBytes, Stream, ArgumentAddresses, nullptr), - "cuLaunchKernel"); - }; - - void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses()); - size_t SharedArgumentCount = ArgumentArray.getSharedCount(); - if (SharedArgumentCount) { - // The argument handling in this case is not very efficient. We may need to - // come back and optimize it later. - // - // Perhaps introduce another branch for the case where there is exactly one - // shared memory argument and it is the first one. This is the only case - // that will be used for compiler-generated CUDA kernels, and OpenCL users - // can choose to take advantage of it by combining their dynamic shared - // memory arguments and putting them first in the kernel signature. - unsigned SharedMemoryBytes = 0; - size_t ArgumentCount = ArgumentArray.getArgumentCount(); - llvm::SmallVector<void *, 16> NonSharedArgumentAddresses( - ArgumentCount - SharedArgumentCount); - size_t NonSharedIndex = 0; - for (size_t I = 0; I < ArgumentCount; ++I) - if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY) - SharedMemoryBytes += ArgumentArray.getSize(I); - else - NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I]; - return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data()); - } - return Launch(0, ArgumentAddresses); -} - -Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle, - const void *DeviceSrcHandle, - size_t SrcByteOffset, void *HostDst, - size_t DstByteOffset, size_t ByteCount) { - return CUresultToError( - cuMemcpyDtoHAsync( - offset(HostDst, DstByteOffset), - reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), - ByteCount, - static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), - "cuMemcpyDtoHAsync"); -} - -Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle, - const void *HostSrc, size_t SrcByteOffset, - const void *DeviceDstHandle, - size_t DstByteOffset, size_t ByteCount) { - return CUresultToError( - cuMemcpyHtoDAsync( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - offset(HostSrc, SrcByteOffset), ByteCount, - static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), - "cuMemcpyHtoDAsync"); -} - -Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle, - const void *DeviceSrcHandle, - size_t SrcByteOffset, - const void *DeviceDstHandle, - size_t DstByteOffset, size_t ByteCount) { - return CUresultToError( - cuMemcpyDtoDAsync( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), - ByteCount, - static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), - "cuMemcpyDtoDAsync"); -} - -Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) { - return CUresultToError(cuStreamSynchronize(static_cast<CUstream>( - const_cast<void *>(PlatformStreamHandle))), - "cuStreamSynchronize"); -} - -Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) { - CUdeviceptr Pointer; - if (CUresult Result = cuMemAlloc(&Pointer, ByteCount)) - return CUresultToError(Result, "cuMemAlloc"); - return reinterpret_cast<void *>(Pointer); -} - -Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) { - return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)), - "cuMemFree"); -} - -Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) { - return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u), - "cuMemHostRegister"); -} - -Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) { - return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)), - "cuMemHostUnregister"); -} - -Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle, - size_t SrcByteOffset, - void *HostDst, - size_t DstByteOffset, - size_t ByteCount) { - return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset), - reinterpret_cast<CUdeviceptr>(offset( - DeviceSrcHandle, SrcByteOffset)), - ByteCount), - "cuMemcpyDtoH"); -} - -Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc, - size_t SrcByteOffset, - const void *DeviceDstHandle, - size_t DstByteOffset, - size_t ByteCount) { - return CUresultToError( - cuMemcpyHtoD( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - offset(HostSrc, SrcByteOffset), ByteCount), - "cuMemcpyHtoD"); -} - -Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle, - size_t DstByteOffset, - const void *DeviceSrcHandle, - size_t SrcByteOffset, - size_t ByteCount) { - return CUresultToError( - cuMemcpyDtoD( - reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), - reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), - ByteCount), - "cuMemcpyDtoD"); -} - -} // namespace cuda -} // namespace streamexecutor |

