summaryrefslogtreecommitdiffstats
path: root/parallel-libs/streamexecutor/lib
diff options
context:
space:
mode:
Diffstat (limited to 'parallel-libs/streamexecutor/lib')
-rw-r--r--parallel-libs/streamexecutor/lib/CMakeLists.txt25
-rw-r--r--parallel-libs/streamexecutor/lib/Device.cpp37
-rw-r--r--parallel-libs/streamexecutor/lib/DeviceMemory.cpp27
-rw-r--r--parallel-libs/streamexecutor/lib/Error.cpp70
-rw-r--r--parallel-libs/streamexecutor/lib/HostMemory.cpp28
-rw-r--r--parallel-libs/streamexecutor/lib/Kernel.cpp60
-rw-r--r--parallel-libs/streamexecutor/lib/KernelSpec.cpp92
-rw-r--r--parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp21
-rw-r--r--parallel-libs/streamexecutor/lib/Platform.cpp21
-rw-r--r--parallel-libs/streamexecutor/lib/PlatformDevice.cpp21
-rw-r--r--parallel-libs/streamexecutor/lib/PlatformManager.cpp49
-rw-r--r--parallel-libs/streamexecutor/lib/Stream.cpp54
-rw-r--r--parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt3
-rw-r--r--parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt5
-rw-r--r--parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp65
-rw-r--r--parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp307
16 files changed, 0 insertions, 885 deletions
diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt
deleted file mode 100644
index 6157654a97a..00000000000
--- a/parallel-libs/streamexecutor/lib/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-macro(add_se_library name)
- add_llvm_library(${name} ${ARGN})
- set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries")
-endmacro(add_se_library)
-
-add_subdirectory(platforms)
-
-add_se_library(
- streamexecutor
- Device.cpp
- DeviceMemory.cpp
- Error.cpp
- HostMemory.cpp
- Kernel.cpp
- KernelSpec.cpp
- PackedKernelArgumentArray.cpp
- Platform.cpp
- PlatformDevice.cpp
- PlatformManager.cpp
- Stream.cpp
- ${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT}
- LINK_LIBS
- ${STREAM_EXECUTOR_LIBCUDA_LIBRARIES})
-
-install(TARGETS streamexecutor DESTINATION lib)
diff --git a/parallel-libs/streamexecutor/lib/Device.cpp b/parallel-libs/streamexecutor/lib/Device.cpp
deleted file mode 100644
index 2bed3e7be16..00000000000
--- a/parallel-libs/streamexecutor/lib/Device.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- Device.cpp - Device implementation --------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of Device class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Device.h"
-
-#include <cassert>
-
-#include "streamexecutor/PlatformDevice.h"
-#include "streamexecutor/Stream.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace streamexecutor {
-
-Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
-
-Device::~Device() = default;
-
-Expected<Stream> Device::createStream() {
- Expected<const void *> MaybePlatformStream = PDevice->createStream();
- if (!MaybePlatformStream)
- return MaybePlatformStream.takeError();
- return Stream(PDevice, *MaybePlatformStream);
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp b/parallel-libs/streamexecutor/lib/DeviceMemory.cpp
deleted file mode 100644
index 8447a60b1ca..00000000000
--- a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- DeviceMemory.cpp - DeviceMemory implementation --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of DeviceMemory class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/DeviceMemory.h"
-
-#include "streamexecutor/Device.h"
-
-namespace streamexecutor {
-
-GlobalDeviceMemoryBase::~GlobalDeviceMemoryBase() {
- if (Handle)
- // TODO(jhen): How to handle errors here.
- consumeError(TheDevice->freeDeviceMemory(*this));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Error.cpp b/parallel-libs/streamexecutor/lib/Error.cpp
deleted file mode 100644
index 0d728fab669..00000000000
--- a/parallel-libs/streamexecutor/lib/Error.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- Error.cpp - Error handling ----------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Types for returning recoverable errors.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Error.h"
-
-#include "llvm/ADT/StringRef.h"
-
-namespace {
-
-// An error with a string message describing the cause.
-class StreamExecutorError : public llvm::ErrorInfo<StreamExecutorError> {
-public:
- StreamExecutorError(llvm::StringRef Message) : Message(Message.str()) {}
-
- void log(llvm::raw_ostream &OS) const override { OS << Message; }
-
- std::error_code convertToErrorCode() const override {
- llvm_unreachable(
- "StreamExecutorError does not support conversion to std::error_code");
- }
-
- std::string getErrorMessage() const { return Message; }
-
- static char ID;
-
-private:
- std::string Message;
-};
-
-char StreamExecutorError::ID = 0;
-
-} // namespace
-
-namespace streamexecutor {
-
-Error make_error(const Twine &Message) {
- return llvm::make_error<StreamExecutorError>(Message.str());
-}
-
-std::string consumeAndGetMessage(Error &&E) {
- if (!E)
- return "success";
- std::string Message;
- llvm::handleAllErrors(std::move(E),
- [&Message](const StreamExecutorError &SEE) {
- Message = SEE.getErrorMessage();
- });
- return Message;
-}
-
-void dieIfError(Error &&E) {
- if (E) {
- std::fprintf(stderr, "Error encountered: %s.\n",
- streamexecutor::consumeAndGetMessage(std::move(E)).c_str());
- std::exit(EXIT_FAILURE);
- }
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/HostMemory.cpp b/parallel-libs/streamexecutor/lib/HostMemory.cpp
deleted file mode 100644
index 8eba7e6b563..00000000000
--- a/parallel-libs/streamexecutor/lib/HostMemory.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- HostMemory.cpp - HostMemory implementation ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of HostMemory internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/HostMemory.h"
-#include "streamexecutor/Device.h"
-
-namespace streamexecutor {
-namespace internal {
-
-void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer) {
- // TODO(jhen): How to handle errors here?
- if (Pointer)
- consumeError(TheDevice->unregisterHostMemory(Pointer));
-}
-
-} // namespace internal
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp
deleted file mode 100644
index 911ac6656aa..00000000000
--- a/parallel-libs/streamexecutor/lib/Kernel.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Kernel.cpp - General kernel implementation ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for kernel types.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Kernel.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "llvm/DebugInfo/Symbolize/Symbolize.h"
-
-namespace streamexecutor {
-
-KernelBase::KernelBase(PlatformDevice *D, const void *PlatformKernelHandle,
- llvm::StringRef Name)
- : PDevice(D), PlatformKernelHandle(PlatformKernelHandle), Name(Name),
- DemangledName(
- llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr)) {
- assert(D != nullptr &&
- "cannot construct a kernel object with a null platform device");
- assert(PlatformKernelHandle != nullptr &&
- "cannot construct a kernel object with a null platform kernel handle");
-}
-
-KernelBase::KernelBase(KernelBase &&Other) noexcept
- : PDevice(Other.PDevice), PlatformKernelHandle(Other.PlatformKernelHandle),
- Name(std::move(Other.Name)),
- DemangledName(std::move(Other.DemangledName)) {
- Other.PDevice = nullptr;
- Other.PlatformKernelHandle = nullptr;
-}
-
-KernelBase &KernelBase::operator=(KernelBase &&Other) noexcept {
- PDevice = Other.PDevice;
- PlatformKernelHandle = Other.PlatformKernelHandle;
- Name = std::move(Other.Name);
- DemangledName = std::move(Other.DemangledName);
- Other.PDevice = nullptr;
- Other.PlatformKernelHandle = nullptr;
- return *this;
-}
-
-KernelBase::~KernelBase() {
- if (PlatformKernelHandle)
- // TODO(jhen): Handle the error here.
- consumeError(PDevice->destroyKernel(PlatformKernelHandle));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/KernelSpec.cpp b/parallel-libs/streamexecutor/lib/KernelSpec.cpp
deleted file mode 100644
index 951ea8fc41c..00000000000
--- a/parallel-libs/streamexecutor/lib/KernelSpec.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- KernelSpec.cpp - General kernel spec implementation ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for kernel loader specs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/KernelSpec.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace streamexecutor {
-
-KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName)
- : KernelName(KernelName) {}
-
-CUDAPTXInMemorySpec::CUDAPTXInMemorySpec(
- llvm::StringRef KernelName,
- const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList)
- : KernelLoaderSpec(KernelName) {
- for (const auto &Spec : SpecList)
- PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode);
-}
-
-const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
- int ComputeCapabilityMinor) const {
- auto Iterator =
- PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
- ComputeCapabilityMajor, ComputeCapabilityMinor});
- if (Iterator == PTXByComputeCapability.begin())
- return nullptr;
- --Iterator;
- return Iterator->second;
-}
-
-CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
- const void *Bytes)
- : KernelLoaderSpec(KernelName), Bytes(Bytes) {}
-
-OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName,
- const char *Text)
- : KernelLoaderSpec(KernelName), Text(Text) {}
-
-void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) {
- if (TheKernelName)
- assert(KernelName.equals(*TheKernelName) &&
- "different kernel names in one MultiKernelLoaderSpec");
- else
- TheKernelName = llvm::make_unique<std::string>(KernelName);
-}
-
-MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory(
- llvm::StringRef KernelName,
- llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) {
- assert((TheCUDAPTXInMemorySpec == nullptr) &&
- "illegal loader spec overwrite");
- setKernelName(KernelName);
- TheCUDAPTXInMemorySpec =
- llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList);
- return *this;
-}
-
-MultiKernelLoaderSpec &
-MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName,
- const void *Bytes) {
- assert((TheCUDAFatbinInMemorySpec == nullptr) &&
- "illegal loader spec overwrite");
- setKernelName(KernelName);
- TheCUDAFatbinInMemorySpec =
- llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes);
- return *this;
-}
-
-MultiKernelLoaderSpec &
-MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName,
- const char *OpenCLText) {
- assert((TheOpenCLTextInMemorySpec == nullptr) &&
- "illegal loader spec overwrite");
- setKernelName(KernelName);
- TheOpenCLTextInMemorySpec =
- llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText);
- return *this;
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp b/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp
deleted file mode 100644
index 04ac80d74ed..00000000000
--- a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation details for classes from PackedKernelArgumentArray.h.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PackedKernelArgumentArray.h"
-
-namespace streamexecutor {
-
-PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Platform.cpp b/parallel-libs/streamexecutor/lib/Platform.cpp
deleted file mode 100644
index 4250468a022..00000000000
--- a/parallel-libs/streamexecutor/lib/Platform.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- Platform.cpp - Platform implementation ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of Platform class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Platform.h"
-
-namespace streamexecutor {
-
-Platform::~Platform() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp b/parallel-libs/streamexecutor/lib/PlatformDevice.cpp
deleted file mode 100644
index 8dd44a3a4aa..00000000000
--- a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- PlatformDevice.cpp - Platform interface implementations -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation file for PlatformDevice.h.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PlatformDevice.h"
-
-namespace streamexecutor {
-
-PlatformDevice::~PlatformDevice() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PlatformManager.cpp b/parallel-libs/streamexecutor/lib/PlatformManager.cpp
deleted file mode 100644
index 8f44befbd71..00000000000
--- a/parallel-libs/streamexecutor/lib/PlatformManager.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- PlatformManager.cpp - PlatformManager implementation --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of PlatformManager class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PlatformManager.h"
-
-#include "streamexecutor/PlatformOptions.h"
-#include "streamexecutor/platforms/host/HostPlatform.h"
-
-#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
-#endif
-
-namespace streamexecutor {
-
-PlatformManager::PlatformManager() {
- // TODO(jhen): Register known platforms by name.
- // We have a couple of options here:
- // * Use build-system flags to set preprocessor macros that select the
- // appropriate code to include here.
- // * Use static initialization tricks to have platform libraries register
- // themselves when they are loaded.
-
- PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>());
-
-#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
- PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>());
-#endif
-}
-
-Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) {
- static PlatformManager Instance;
- auto Iterator = Instance.PlatformsByName.find(Name.lower());
- if (Iterator != Instance.PlatformsByName.end())
- return Iterator->second.get();
- return make_error("no available platform with name " + Name);
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp
deleted file mode 100644
index fe135b4d0af..00000000000
--- a/parallel-libs/streamexecutor/lib/Stream.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Stream.cpp - General stream implementation ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for a general stream object.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "streamexecutor/Stream.h"
-
-namespace streamexecutor {
-
-Stream::Stream(PlatformDevice *D, const void *PlatformStreamHandle)
- : PDevice(D), PlatformStreamHandle(PlatformStreamHandle),
- ErrorMessageMutex(llvm::make_unique<llvm::sys::RWMutex>()) {
- assert(D != nullptr &&
- "cannot construct a stream object with a null platform device");
- assert(PlatformStreamHandle != nullptr &&
- "cannot construct a stream object with a null platform stream handle");
-}
-
-Stream::Stream(Stream &&Other) noexcept
- : PDevice(Other.PDevice), PlatformStreamHandle(Other.PlatformStreamHandle),
- ErrorMessageMutex(std::move(Other.ErrorMessageMutex)),
- ErrorMessage(std::move(Other.ErrorMessage)) {
- Other.PDevice = nullptr;
- Other.PlatformStreamHandle = nullptr;
-}
-
-Stream &Stream::operator=(Stream &&Other) noexcept {
- PDevice = Other.PDevice;
- PlatformStreamHandle = Other.PlatformStreamHandle;
- ErrorMessageMutex = std::move(Other.ErrorMessageMutex);
- ErrorMessage = std::move(Other.ErrorMessage);
- Other.PDevice = nullptr;
- Other.PlatformStreamHandle = nullptr;
- return *this;
-}
-
-Stream::~Stream() {
- if (PlatformStreamHandle)
- // TODO(jhen): Handle error condition here.
- consumeError(PDevice->destroyStream(PlatformStreamHandle));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
deleted file mode 100644
index 0802c059add..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
- add_subdirectory(cuda)
-endif()
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt
deleted file mode 100644
index 5be76d1c75a..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(
- streamexecutor_cuda_platform
- OBJECT
- CUDAPlatform.cpp
- CUDAPlatformDevice.cpp)
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
deleted file mode 100644
index 9f9e4388647..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of CUDA platform internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Platform.h"
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-
-#include "llvm/Support/Mutex.h"
-
-#include "cuda.h"
-
-#include <map>
-
-namespace streamexecutor {
-namespace cuda {
-
-static CUresult ensureCUDAInitialized() {
- static CUresult InitResult = []() { return cuInit(0); }();
- return InitResult;
-}
-
-size_t CUDAPlatform::getDeviceCount() const {
- if (ensureCUDAInitialized())
- // TODO(jhen): Log an error.
- return 0;
-
- int DeviceCount = 0;
- CUresult Result = cuDeviceGetCount(&DeviceCount);
- (void)Result;
- // TODO(jhen): Log an error.
-
- return DeviceCount;
-}
-
-Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) {
- if (CUresult InitResult = ensureCUDAInitialized())
- return CUresultToError(InitResult, "cached cuInit return value");
-
- llvm::sys::ScopedLock Lock(Mutex);
- auto Iterator = PlatformDevices.find(DeviceIndex);
- if (Iterator == PlatformDevices.end()) {
- if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) {
- Iterator =
- PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first;
- } else {
- return MaybePDevice.takeError();
- }
- }
- return Device(&Iterator->second);
-}
-
-} // namespace cuda
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
deleted file mode 100644
index 5284a9a0a35..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of CUDAPlatformDevice.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "cuda.h"
-
-namespace streamexecutor {
-namespace cuda {
-
-static void *offset(const void *Base, size_t Offset) {
- return const_cast<char *>(static_cast<const char *>(Base) + Offset);
-}
-
-Error CUresultToError(int CUResult, const llvm::Twine &Message) {
- CUresult Result = static_cast<CUresult>(CUResult);
- if (Result) {
- const char *ErrorName;
- if (cuGetErrorName(Result, &ErrorName))
- ErrorName = "UNKNOWN ERROR NAME";
- const char *ErrorString;
- if (cuGetErrorString(Result, &ErrorString))
- ErrorString = "UNKNOWN ERROR DESCRIPTION";
- return make_error("CUDA driver error: '" + Message + "', error code = " +
- llvm::Twine(static_cast<int>(Result)) + ", name = " +
- ErrorName + ", description = '" + ErrorString + "'");
- } else
- return Error::success();
-}
-
-std::string CUDAPlatformDevice::getName() const {
- static std::string CachedName = [](int DeviceIndex) {
- static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024;
- std::string Name = "CUDA device " + std::to_string(DeviceIndex);
- char NameFromDriver[MAX_DRIVER_NAME_BYTES];
- if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1,
- DeviceIndex)) {
- NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0';
- Name.append(": ").append(NameFromDriver);
- }
- return Name;
- }(DeviceIndex);
- return CachedName;
-}
-
-Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) {
- CUdevice DeviceHandle;
- if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex))
- return CUresultToError(Result, "cuDeviceGet");
-
- CUcontext ContextHandle;
- if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle))
- return CUresultToError(Result, "cuDevicePrimaryCtxRetain");
-
- if (CUresult Result = cuCtxSetCurrent(ContextHandle))
- return CUresultToError(Result, "cuCtxSetCurrent");
-
- return CUDAPlatformDevice(DeviceIndex);
-}
-
-CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept
- : DeviceIndex(Other.DeviceIndex) {
- Other.DeviceIndex = -1;
-}
-
-CUDAPlatformDevice &CUDAPlatformDevice::
-operator=(CUDAPlatformDevice &&Other) noexcept {
- DeviceIndex = Other.DeviceIndex;
- Other.DeviceIndex = -1;
- return *this;
-}
-
-CUDAPlatformDevice::~CUDAPlatformDevice() {
- CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex);
- (void)Result;
- // TODO(jhen): Log error.
-}
-
-Expected<const void *>
-CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) {
- // TODO(jhen): Maybe first check loaded modules?
- if (!Spec.hasCUDAPTXInMemory())
- return make_error("no CUDA code available to create kernel");
-
- CUdevice Device = static_cast<int>(DeviceIndex);
- int ComputeCapabilityMajor = 0;
- int ComputeCapabilityMinor = 0;
- if (CUresult Result = cuDeviceGetAttribute(
- &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
- Device))
- return CUresultToError(
- Result,
- "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR");
- if (CUresult Result = cuDeviceGetAttribute(
- &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
- Device))
- return CUresultToError(
- Result,
- "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR");
- const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor,
- ComputeCapabilityMinor);
-
- if (!Code)
- return make_error("no suitable CUDA source found for compute capability " +
- llvm::Twine(ComputeCapabilityMajor) + "." +
- llvm::Twine(ComputeCapabilityMinor));
-
- CUmodule Module;
- if (CUresult Result = cuModuleLoadData(&Module, Code))
- return CUresultToError(Result, "cuModuleLoadData");
-
- CUfunction Function;
- if (CUresult Result =
- cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str()))
- return CUresultToError(Result, "cuModuleGetFunction");
-
- // TODO(jhen): Should I save this function pointer in case someone asks for
- // it again?
-
- // TODO(jhen): Should I save the module pointer so I can unload it when I
- // destroy this device?
-
- return static_cast<const void *>(Function);
-}
-
-Error CUDAPlatformDevice::destroyKernel(const void *Handle) {
- // TODO(jhen): Maybe keep track of kernels for each module and unload the
- // module after they are all destroyed.
- return Error::success();
-}
-
-Expected<const void *> CUDAPlatformDevice::createStream() {
- CUstream Stream;
- if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT))
- return CUresultToError(Result, "cuStreamCreate");
- return Stream;
-}
-
-Error CUDAPlatformDevice::destroyStream(const void *Handle) {
- return CUresultToError(
- cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))),
- "cuStreamDestroy");
-}
-
-Error CUDAPlatformDevice::launch(
- const void *PlatformStreamHandle, BlockDimensions BlockSize,
- GridDimensions GridSize, const void *PKernelHandle,
- const PackedKernelArgumentArrayBase &ArgumentArray) {
- CUfunction Function =
- reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle));
- CUstream Stream =
- reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle));
-
- auto Launch = [Function, Stream, BlockSize,
- GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) {
- return CUresultToError(
- cuLaunchKernel(Function, //
- GridSize.X, GridSize.Y, GridSize.Z, //
- BlockSize.X, BlockSize.Y, BlockSize.Z, //
- SharedMemoryBytes, Stream, ArgumentAddresses, nullptr),
- "cuLaunchKernel");
- };
-
- void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses());
- size_t SharedArgumentCount = ArgumentArray.getSharedCount();
- if (SharedArgumentCount) {
- // The argument handling in this case is not very efficient. We may need to
- // come back and optimize it later.
- //
- // Perhaps introduce another branch for the case where there is exactly one
- // shared memory argument and it is the first one. This is the only case
- // that will be used for compiler-generated CUDA kernels, and OpenCL users
- // can choose to take advantage of it by combining their dynamic shared
- // memory arguments and putting them first in the kernel signature.
- unsigned SharedMemoryBytes = 0;
- size_t ArgumentCount = ArgumentArray.getArgumentCount();
- llvm::SmallVector<void *, 16> NonSharedArgumentAddresses(
- ArgumentCount - SharedArgumentCount);
- size_t NonSharedIndex = 0;
- for (size_t I = 0; I < ArgumentCount; ++I)
- if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY)
- SharedMemoryBytes += ArgumentArray.getSize(I);
- else
- NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I];
- return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data());
- }
- return Launch(0, ArgumentAddresses);
-}
-
-Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle,
- const void *DeviceSrcHandle,
- size_t SrcByteOffset, void *HostDst,
- size_t DstByteOffset, size_t ByteCount) {
- return CUresultToError(
- cuMemcpyDtoHAsync(
- offset(HostDst, DstByteOffset),
- reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
- ByteCount,
- static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
- "cuMemcpyDtoHAsync");
-}
-
-Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle,
- const void *HostSrc, size_t SrcByteOffset,
- const void *DeviceDstHandle,
- size_t DstByteOffset, size_t ByteCount) {
- return CUresultToError(
- cuMemcpyHtoDAsync(
- reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
- offset(HostSrc, SrcByteOffset), ByteCount,
- static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
- "cuMemcpyHtoDAsync");
-}
-
-Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle,
- const void *DeviceSrcHandle,
- size_t SrcByteOffset,
- const void *DeviceDstHandle,
- size_t DstByteOffset, size_t ByteCount) {
- return CUresultToError(
- cuMemcpyDtoDAsync(
- reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
- reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
- ByteCount,
- static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
- "cuMemcpyDtoDAsync");
-}
-
-Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) {
- return CUresultToError(cuStreamSynchronize(static_cast<CUstream>(
- const_cast<void *>(PlatformStreamHandle))),
- "cuStreamSynchronize");
-}
-
-Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) {
- CUdeviceptr Pointer;
- if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
- return CUresultToError(Result, "cuMemAlloc");
- return reinterpret_cast<void *>(Pointer);
-}
-
-Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) {
- return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)),
- "cuMemFree");
-}
-
-Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) {
- return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u),
- "cuMemHostRegister");
-}
-
-Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) {
- return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)),
- "cuMemHostUnregister");
-}
-
-Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle,
- size_t SrcByteOffset,
- void *HostDst,
- size_t DstByteOffset,
- size_t ByteCount) {
- return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset),
- reinterpret_cast<CUdeviceptr>(offset(
- DeviceSrcHandle, SrcByteOffset)),
- ByteCount),
- "cuMemcpyDtoH");
-}
-
-Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc,
- size_t SrcByteOffset,
- const void *DeviceDstHandle,
- size_t DstByteOffset,
- size_t ByteCount) {
- return CUresultToError(
- cuMemcpyHtoD(
- reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
- offset(HostSrc, SrcByteOffset), ByteCount),
- "cuMemcpyHtoD");
-}
-
-Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle,
- size_t DstByteOffset,
- const void *DeviceSrcHandle,
- size_t SrcByteOffset,
- size_t ByteCount) {
- return CUresultToError(
- cuMemcpyDtoD(
- reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
- reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
- ByteCount),
- "cuMemcpyDtoD");
-}
-
-} // namespace cuda
-} // namespace streamexecutor
OpenPOWER on IntegriCloud