16 files changed, 0 insertions, 885 deletions
diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt
deleted file mode 100644
index 6157654a97a..00000000000
--- a/parallel-libs/streamexecutor/lib/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-macro(add_se_library name)
-  add_llvm_library(${name} ${ARGN})
-  set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries")
-endmacro(add_se_library)
-
-add_subdirectory(platforms)
-
-add_se_library(
-    streamexecutor
-    Device.cpp
-    DeviceMemory.cpp
-    Error.cpp
-    HostMemory.cpp
-    Kernel.cpp
-    KernelSpec.cpp
-    PackedKernelArgumentArray.cpp
-    Platform.cpp
-    PlatformDevice.cpp
-    PlatformManager.cpp
-    Stream.cpp
-    ${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT}
-    LINK_LIBS
-    ${STREAM_EXECUTOR_LIBCUDA_LIBRARIES})
-
-install(TARGETS streamexecutor DESTINATION lib)
diff --git a/parallel-libs/streamexecutor/lib/Device.cpp b/parallel-libs/streamexecutor/lib/Device.cpp
deleted file mode 100644
index 2bed3e7be16..00000000000
--- a/parallel-libs/streamexecutor/lib/Device.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- Device.cpp - Device implementation --------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of Device class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Device.h"
-
-#include <cassert>
-
-#include "streamexecutor/PlatformDevice.h"
-#include "streamexecutor/Stream.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace streamexecutor {
-
-Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
-
-Device::~Device() = default;
-
-Expected<Stream> Device::createStream() {
-  Expected<const void *> MaybePlatformStream = PDevice->createStream();
-  if (!MaybePlatformStream)
-    return MaybePlatformStream.takeError();
-  return Stream(PDevice, *MaybePlatformStream);
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp b/parallel-libs/streamexecutor/lib/DeviceMemory.cpp
deleted file mode 100644
index 8447a60b1ca..00000000000
--- a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- DeviceMemory.cpp - DeviceMemory implementation --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of DeviceMemory class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/DeviceMemory.h"
-
-#include "streamexecutor/Device.h"
-
-namespace streamexecutor {
-
-GlobalDeviceMemoryBase::~GlobalDeviceMemoryBase() {
-  if (Handle)
-    // TODO(jhen): How to handle errors here.
-    consumeError(TheDevice->freeDeviceMemory(*this));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Error.cpp b/parallel-libs/streamexecutor/lib/Error.cpp
deleted file mode 100644
index 0d728fab669..00000000000
--- a/parallel-libs/streamexecutor/lib/Error.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- Error.cpp - Error handling ----------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Types for returning recoverable errors.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Error.h"
-
-#include "llvm/ADT/StringRef.h"
-
-namespace {
-
-// An error with a string message describing the cause.
-class StreamExecutorError : public llvm::ErrorInfo<StreamExecutorError> {
-public:
-  StreamExecutorError(llvm::StringRef Message) : Message(Message.str()) {}
-
-  void log(llvm::raw_ostream &OS) const override { OS << Message; }
-
-  std::error_code convertToErrorCode() const override {
-    llvm_unreachable(
-        "StreamExecutorError does not support conversion to std::error_code");
-  }
-
-  std::string getErrorMessage() const { return Message; }
-
-  static char ID;
-
-private:
-  std::string Message;
-};
-
-char StreamExecutorError::ID = 0;
-
-} // namespace
-
-namespace streamexecutor {
-
-Error make_error(const Twine &Message) {
-  return llvm::make_error<StreamExecutorError>(Message.str());
-}
-
-std::string consumeAndGetMessage(Error &&E) {
-  if (!E)
-    return "success";
-  std::string Message;
-  llvm::handleAllErrors(std::move(E),
-                        [&Message](const StreamExecutorError &SEE) {
-                          Message = SEE.getErrorMessage();
-                        });
-  return Message;
-}
-
-void dieIfError(Error &&E) {
-  if (E) {
-    std::fprintf(stderr, "Error encountered: %s.\n",
-                 streamexecutor::consumeAndGetMessage(std::move(E)).c_str());
-    std::exit(EXIT_FAILURE);
-  }
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/HostMemory.cpp b/parallel-libs/streamexecutor/lib/HostMemory.cpp
deleted file mode 100644
index 8eba7e6b563..00000000000
--- a/parallel-libs/streamexecutor/lib/HostMemory.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- HostMemory.cpp - HostMemory implementation ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of HostMemory internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/HostMemory.h"
-#include "streamexecutor/Device.h"
-
-namespace streamexecutor {
-namespace internal {
-
-void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer) {
-  // TODO(jhen): How to handle errors here?
-  if (Pointer)
-    consumeError(TheDevice->unregisterHostMemory(Pointer));
-}
-
-} // namespace internal
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp
deleted file mode 100644
index 911ac6656aa..00000000000
--- a/parallel-libs/streamexecutor/lib/Kernel.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Kernel.cpp - General kernel implementation ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for kernel types.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Kernel.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "llvm/DebugInfo/Symbolize/Symbolize.h"
-
-namespace streamexecutor {
-
-KernelBase::KernelBase(PlatformDevice *D, const void *PlatformKernelHandle,
-                       llvm::StringRef Name)
-    : PDevice(D), PlatformKernelHandle(PlatformKernelHandle), Name(Name),
-      DemangledName(
-          llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr)) {
-  assert(D != nullptr &&
-         "cannot construct a kernel object with a null platform device");
-  assert(PlatformKernelHandle != nullptr &&
-         "cannot construct a kernel object with a null platform kernel handle");
-}
-
-KernelBase::KernelBase(KernelBase &&Other) noexcept
-    : PDevice(Other.PDevice), PlatformKernelHandle(Other.PlatformKernelHandle),
-      Name(std::move(Other.Name)),
-      DemangledName(std::move(Other.DemangledName)) {
-  Other.PDevice = nullptr;
-  Other.PlatformKernelHandle = nullptr;
-}
-
-KernelBase &KernelBase::operator=(KernelBase &&Other) noexcept {
-  PDevice = Other.PDevice;
-  PlatformKernelHandle = Other.PlatformKernelHandle;
-  Name = std::move(Other.Name);
-  DemangledName = std::move(Other.DemangledName);
-  Other.PDevice = nullptr;
-  Other.PlatformKernelHandle = nullptr;
-  return *this;
-}
-
-KernelBase::~KernelBase() {
-  if (PlatformKernelHandle)
-    // TODO(jhen): Handle the error here.
-    consumeError(PDevice->destroyKernel(PlatformKernelHandle));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/KernelSpec.cpp b/parallel-libs/streamexecutor/lib/KernelSpec.cpp
deleted file mode 100644
index 951ea8fc41c..00000000000
--- a/parallel-libs/streamexecutor/lib/KernelSpec.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- KernelSpec.cpp - General kernel spec implementation ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for kernel loader specs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/KernelSpec.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace streamexecutor {
-
-KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName)
-    : KernelName(KernelName) {}
-
-CUDAPTXInMemorySpec::CUDAPTXInMemorySpec(
-    llvm::StringRef KernelName,
-    const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList)
-    : KernelLoaderSpec(KernelName) {
-  for (const auto &Spec : SpecList)
-    PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode);
-}
-
-const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
-                                         int ComputeCapabilityMinor) const {
-  auto Iterator =
-      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
-          ComputeCapabilityMajor, ComputeCapabilityMinor});
-  if (Iterator == PTXByComputeCapability.begin())
-    return nullptr;
-  --Iterator;
-  return Iterator->second;
-}
-
-CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
-                                               const void *Bytes)
-    : KernelLoaderSpec(KernelName), Bytes(Bytes) {}
-
-OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName,
-                                               const char *Text)
-    : KernelLoaderSpec(KernelName), Text(Text) {}
-
-void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) {
-  if (TheKernelName)
-    assert(KernelName.equals(*TheKernelName) &&
-           "different kernel names in one MultiKernelLoaderSpec");
-  else
-    TheKernelName = llvm::make_unique<std::string>(KernelName);
-}
-
-MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory(
-    llvm::StringRef KernelName,
-    llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) {
-  assert((TheCUDAPTXInMemorySpec == nullptr) &&
-         "illegal loader spec overwrite");
-  setKernelName(KernelName);
-  TheCUDAPTXInMemorySpec =
-      llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList);
-  return *this;
-}
-
-MultiKernelLoaderSpec &
-MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName,
-                                             const void *Bytes) {
-  assert((TheCUDAFatbinInMemorySpec == nullptr) &&
-         "illegal loader spec overwrite");
-  setKernelName(KernelName);
-  TheCUDAFatbinInMemorySpec =
-      llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes);
-  return *this;
-}
-
-MultiKernelLoaderSpec &
-MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName,
-                                             const char *OpenCLText) {
-  assert((TheOpenCLTextInMemorySpec == nullptr) &&
-         "illegal loader spec overwrite");
-  setKernelName(KernelName);
-  TheOpenCLTextInMemorySpec =
-      llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText);
-  return *this;
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp b/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp
deleted file mode 100644
index 04ac80d74ed..00000000000
--- a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation details for classes from PackedKernelArgumentArray.h.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PackedKernelArgumentArray.h"
-
-namespace streamexecutor {
-
-PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Platform.cpp b/parallel-libs/streamexecutor/lib/Platform.cpp
deleted file mode 100644
index 4250468a022..00000000000
--- a/parallel-libs/streamexecutor/lib/Platform.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- Platform.cpp - Platform implementation ----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of Platform class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Platform.h"
-
-namespace streamexecutor {
-
-Platform::~Platform() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp b/parallel-libs/streamexecutor/lib/PlatformDevice.cpp
deleted file mode 100644
index 8dd44a3a4aa..00000000000
--- a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- PlatformDevice.cpp - Platform interface implementations -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation file for PlatformDevice.h.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PlatformDevice.h"
-
-namespace streamexecutor {
-
-PlatformDevice::~PlatformDevice() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PlatformManager.cpp b/parallel-libs/streamexecutor/lib/PlatformManager.cpp
deleted file mode 100644
index 8f44befbd71..00000000000
--- a/parallel-libs/streamexecutor/lib/PlatformManager.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- PlatformManager.cpp - PlatformManager implementation --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of PlatformManager class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PlatformManager.h"
-
-#include "streamexecutor/PlatformOptions.h"
-#include "streamexecutor/platforms/host/HostPlatform.h"
-
-#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
-#endif
-
-namespace streamexecutor {
-
-PlatformManager::PlatformManager() {
-  // TODO(jhen): Register known platforms by name.
-  // We have a couple of options here:
-  //  * Use build-system flags to set preprocessor macros that select the
-  //    appropriate code to include here.
-  //  * Use static initialization tricks to have platform libraries register
-  //    themselves when they are loaded.
-
-  PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>());
-
-#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-  PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>());
-#endif
-}
-
-Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) {
-  static PlatformManager Instance;
-  auto Iterator = Instance.PlatformsByName.find(Name.lower());
-  if (Iterator != Instance.PlatformsByName.end())
-    return Iterator->second.get();
-  return make_error("no available platform with name " + Name);
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp
deleted file mode 100644
index fe135b4d0af..00000000000
--- a/parallel-libs/streamexecutor/lib/Stream.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Stream.cpp - General stream implementation ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for a general stream object.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "streamexecutor/Stream.h"
-
-namespace streamexecutor {
-
-Stream::Stream(PlatformDevice *D, const void *PlatformStreamHandle)
-    : PDevice(D), PlatformStreamHandle(PlatformStreamHandle),
-      ErrorMessageMutex(llvm::make_unique<llvm::sys::RWMutex>()) {
-  assert(D != nullptr &&
-         "cannot construct a stream object with a null platform device");
-  assert(PlatformStreamHandle != nullptr &&
-         "cannot construct a stream object with a null platform stream handle");
-}
-
-Stream::Stream(Stream &&Other) noexcept
-    : PDevice(Other.PDevice), PlatformStreamHandle(Other.PlatformStreamHandle),
-      ErrorMessageMutex(std::move(Other.ErrorMessageMutex)),
-      ErrorMessage(std::move(Other.ErrorMessage)) {
-  Other.PDevice = nullptr;
-  Other.PlatformStreamHandle = nullptr;
-}
-
-Stream &Stream::operator=(Stream &&Other) noexcept {
-  PDevice = Other.PDevice;
-  PlatformStreamHandle = Other.PlatformStreamHandle;
-  ErrorMessageMutex = std::move(Other.ErrorMessageMutex);
-  ErrorMessage = std::move(Other.ErrorMessage);
-  Other.PDevice = nullptr;
-  Other.PlatformStreamHandle = nullptr;
-  return *this;
-}
-
-Stream::~Stream() {
-  if (PlatformStreamHandle)
-    // TODO(jhen): Handle error condition here.
-    consumeError(PDevice->destroyStream(PlatformStreamHandle));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
deleted file mode 100644
index 0802c059add..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
-    add_subdirectory(cuda)
-endif()
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt
deleted file mode 100644
index 5be76d1c75a..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(
-    streamexecutor_cuda_platform
-    OBJECT
-    CUDAPlatform.cpp
-    CUDAPlatformDevice.cpp)
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
deleted file mode 100644
index 9f9e4388647..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of CUDA platform internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Platform.h"
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-
-#include "llvm/Support/Mutex.h"
-
-#include "cuda.h"
-
-#include <map>
-
-namespace streamexecutor {
-namespace cuda {
-
-static CUresult ensureCUDAInitialized() {
-  static CUresult InitResult = []() { return cuInit(0); }();
-  return InitResult;
-}
-
-size_t CUDAPlatform::getDeviceCount() const {
-  if (ensureCUDAInitialized())
-    // TODO(jhen): Log an error.
-    return 0;
-
-  int DeviceCount = 0;
-  CUresult Result = cuDeviceGetCount(&DeviceCount);
-  (void)Result;
-  // TODO(jhen): Log an error.
-
-  return DeviceCount;
-}
-
-Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) {
-  if (CUresult InitResult = ensureCUDAInitialized())
-    return CUresultToError(InitResult, "cached cuInit return value");
-
-  llvm::sys::ScopedLock Lock(Mutex);
-  auto Iterator = PlatformDevices.find(DeviceIndex);
-  if (Iterator == PlatformDevices.end()) {
-    if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) {
-      Iterator =
-          PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first;
-    } else {
-      return MaybePDevice.takeError();
-    }
-  }
-  return Device(&Iterator->second);
-}
-
-} // namespace cuda
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
deleted file mode 100644
index 5284a9a0a35..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of CUDAPlatformDevice.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "cuda.h"
-
-namespace streamexecutor {
-namespace cuda {
-
-static void *offset(const void *Base, size_t Offset) {
-  return const_cast<char *>(static_cast<const char *>(Base) + Offset);
-}
-
-Error CUresultToError(int CUResult, const llvm::Twine &Message) {
-  CUresult Result = static_cast<CUresult>(CUResult);
-  if (Result) {
-    const char *ErrorName;
-    if (cuGetErrorName(Result, &ErrorName))
-      ErrorName = "UNKNOWN ERROR NAME";
-    const char *ErrorString;
-    if (cuGetErrorString(Result, &ErrorString))
-      ErrorString = "UNKNOWN ERROR DESCRIPTION";
-    return make_error("CUDA driver error: '" + Message + "', error code = " +
-                      llvm::Twine(static_cast<int>(Result)) + ", name = " +
-                      ErrorName + ", description = '" + ErrorString + "'");
-  } else
-    return Error::success();
-}
-
-std::string CUDAPlatformDevice::getName() const {
-  static std::string CachedName = [](int DeviceIndex) {
-    static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024;
-    std::string Name = "CUDA device " + std::to_string(DeviceIndex);
-    char NameFromDriver[MAX_DRIVER_NAME_BYTES];
-    if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1,
-                         DeviceIndex)) {
-      NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0';
-      Name.append(": ").append(NameFromDriver);
-    }
-    return Name;
-  }(DeviceIndex);
-  return CachedName;
-}
-
-Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) {
-  CUdevice DeviceHandle;
-  if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex))
-    return CUresultToError(Result, "cuDeviceGet");
-
-  CUcontext ContextHandle;
-  if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle))
-    return CUresultToError(Result, "cuDevicePrimaryCtxRetain");
-
-  if (CUresult Result = cuCtxSetCurrent(ContextHandle))
-    return CUresultToError(Result, "cuCtxSetCurrent");
-
-  return CUDAPlatformDevice(DeviceIndex);
-}
-
-CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept
-    : DeviceIndex(Other.DeviceIndex) {
-  Other.DeviceIndex = -1;
-}
-
-CUDAPlatformDevice &CUDAPlatformDevice::
-operator=(CUDAPlatformDevice &&Other) noexcept {
-  DeviceIndex = Other.DeviceIndex;
-  Other.DeviceIndex = -1;
-  return *this;
-}
-
-CUDAPlatformDevice::~CUDAPlatformDevice() {
-  CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex);
-  (void)Result;
-  // TODO(jhen): Log error.
-}
-
-Expected<const void *>
-CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) {
-  // TODO(jhen): Maybe first check loaded modules?
-  if (!Spec.hasCUDAPTXInMemory())
-    return make_error("no CUDA code available to create kernel");
-
-  CUdevice Device = static_cast<int>(DeviceIndex);
-  int ComputeCapabilityMajor = 0;
-  int ComputeCapabilityMinor = 0;
-  if (CUresult Result = cuDeviceGetAttribute(
-          &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
-          Device))
-    return CUresultToError(
-        Result,
-        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR");
-  if (CUresult Result = cuDeviceGetAttribute(
-          &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
-          Device))
-    return CUresultToError(
-        Result,
-        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR");
-  const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor,
-                                                       ComputeCapabilityMinor);
-
-  if (!Code)
-    return make_error("no suitable CUDA source found for compute capability " +
-                      llvm::Twine(ComputeCapabilityMajor) + "." +
-                      llvm::Twine(ComputeCapabilityMinor));
-
-  CUmodule Module;
-  if (CUresult Result = cuModuleLoadData(&Module, Code))
-    return CUresultToError(Result, "cuModuleLoadData");
-
-  CUfunction Function;
-  if (CUresult Result =
-          cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str()))
-    return CUresultToError(Result, "cuModuleGetFunction");
-
-  // TODO(jhen): Should I save this function pointer in case someone asks for
-  // it again?
-
-  // TODO(jhen): Should I save the module pointer so I can unload it when I
-  // destroy this device?
-
-  return static_cast<const void *>(Function);
-}
-
-Error CUDAPlatformDevice::destroyKernel(const void *Handle) {
-  // TODO(jhen): Maybe keep track of kernels for each module and unload the
-  // module after they are all destroyed.
-  return Error::success();
-}
-
-Expected<const void *> CUDAPlatformDevice::createStream() {
-  CUstream Stream;
-  if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT))
-    return CUresultToError(Result, "cuStreamCreate");
-  return Stream;
-}
-
-Error CUDAPlatformDevice::destroyStream(const void *Handle) {
-  return CUresultToError(
-      cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))),
-      "cuStreamDestroy");
-}
-
-Error CUDAPlatformDevice::launch(
-    const void *PlatformStreamHandle, BlockDimensions BlockSize,
-    GridDimensions GridSize, const void *PKernelHandle,
-    const PackedKernelArgumentArrayBase &ArgumentArray) {
-  CUfunction Function =
-      reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle));
-  CUstream Stream =
-      reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle));
-
-  auto Launch = [Function, Stream, BlockSize,
-                 GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) {
-    return CUresultToError(
-        cuLaunchKernel(Function,                              //
-                       GridSize.X, GridSize.Y, GridSize.Z,    //
-                       BlockSize.X, BlockSize.Y, BlockSize.Z, //
-                       SharedMemoryBytes, Stream, ArgumentAddresses, nullptr),
-        "cuLaunchKernel");
-  };
-
-  void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses());
-  size_t SharedArgumentCount = ArgumentArray.getSharedCount();
-  if (SharedArgumentCount) {
-    // The argument handling in this case is not very efficient. We may need to
-    // come back and optimize it later.
-    //
-    // Perhaps introduce another branch for the case where there is exactly one
-    // shared memory argument and it is the first one. This is the only case
-    // that will be used for compiler-generated CUDA kernels, and OpenCL users
-    // can choose to take advantage of it by combining their dynamic shared
-    // memory arguments and putting them first in the kernel signature.
-    unsigned SharedMemoryBytes = 0;
-    size_t ArgumentCount = ArgumentArray.getArgumentCount();
-    llvm::SmallVector<void *, 16> NonSharedArgumentAddresses(
-        ArgumentCount - SharedArgumentCount);
-    size_t NonSharedIndex = 0;
-    for (size_t I = 0; I < ArgumentCount; ++I)
-      if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY)
-        SharedMemoryBytes += ArgumentArray.getSize(I);
-      else
-        NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I];
-    return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data());
-  }
-  return Launch(0, ArgumentAddresses);
-}
-
-Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle,
-                                  const void *DeviceSrcHandle,
-                                  size_t SrcByteOffset, void *HostDst,
-                                  size_t DstByteOffset, size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyDtoHAsync(
-          offset(HostDst, DstByteOffset),
-          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
-          ByteCount,
-          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
-      "cuMemcpyDtoHAsync");
-}
-
-Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle,
-                                  const void *HostSrc, size_t SrcByteOffset,
-                                  const void *DeviceDstHandle,
-                                  size_t DstByteOffset, size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyHtoDAsync(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          offset(HostSrc, SrcByteOffset), ByteCount,
-          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
-      "cuMemcpyHtoDAsync");
-}
-
-Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle,
-                                  const void *DeviceSrcHandle,
-                                  size_t SrcByteOffset,
-                                  const void *DeviceDstHandle,
-                                  size_t DstByteOffset, size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyDtoDAsync(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
-          ByteCount,
-          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
-      "cuMemcpyDtoDAsync");
-}
-
-Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) {
-  return CUresultToError(cuStreamSynchronize(static_cast<CUstream>(
-                             const_cast<void *>(PlatformStreamHandle))),
-                         "cuStreamSynchronize");
-}
-
-Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) {
-  CUdeviceptr Pointer;
-  if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
-    return CUresultToError(Result, "cuMemAlloc");
-  return reinterpret_cast<void *>(Pointer);
-}
-
-Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) {
-  return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)),
-                         "cuMemFree");
-}
-
-Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) {
-  return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u),
-                         "cuMemHostRegister");
-}
-
-Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) {
-  return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)),
-                         "cuMemHostUnregister");
-}
-
-Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle,
-                                             size_t SrcByteOffset,
-                                             void *HostDst,
-                                             size_t DstByteOffset,
-                                             size_t ByteCount) {
-  return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset),
-                                      reinterpret_cast<CUdeviceptr>(offset(
-                                          DeviceSrcHandle, SrcByteOffset)),
-                                      ByteCount),
-                         "cuMemcpyDtoH");
-}
-
-Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc,
-                                             size_t SrcByteOffset,
-                                             const void *DeviceDstHandle,
-                                             size_t DstByteOffset,
-                                             size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyHtoD(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          offset(HostSrc, SrcByteOffset), ByteCount),
-      "cuMemcpyHtoD");
-}
-
-Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle,
-                                             size_t DstByteOffset,
-                                             const void *DeviceSrcHandle,
-                                             size_t SrcByteOffset,
-                                             size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyDtoD(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
-          ByteCount),
-      "cuMemcpyDtoD");
-}
-
-} // namespace cuda
-} // namespace streamexecutor