diff options
| author | Jason Henline <jhen@google.com> | 2016-10-25 20:38:08 +0000 | 
|---|---|---|
| committer | Jason Henline <jhen@google.com> | 2016-10-25 20:38:08 +0000 | 
| commit | b3f709e10f37225ae65c1d48c4623f6abc2cac1e (patch) | |
| tree | 935df072bbc2ee7873c138dd75f04a32d96c2249 /parallel-libs/streamexecutor/lib/platforms | |
| parent | 209a77d8d9247b7612025a15f0b4c18bc49e66eb (diff) | |
| download | bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.tar.gz bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.zip | |
[SE] Remove StreamExecutor
Summary:
The project has been renamed to Acxxel, so this old directory needs to
be deleted.
Reviewers: jlebar, jprice
Subscribers: beanz, mgorny, parallel_libs-commits, modocache
Differential Revision: https://reviews.llvm.org/D25964
llvm-svn: 285115
Diffstat (limited to 'parallel-libs/streamexecutor/lib/platforms')
4 files changed, 0 insertions, 380 deletions
| diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt deleted file mode 100644 index 0802c059add..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) -    add_subdirectory(cuda) -endif() diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt deleted file mode 100644 index 5be76d1c75a..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_library( -    streamexecutor_cuda_platform -    OBJECT -    CUDAPlatform.cpp -    CUDAPlatformDevice.cpp) diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp deleted file mode 100644 index 9f9e4388647..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDA platform internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatform.h" -#include "streamexecutor/Device.h" -#include "streamexecutor/Platform.h" -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" - -#include "llvm/Support/Mutex.h" - -#include "cuda.h" - -#include <map> - -namespace streamexecutor { -namespace cuda { - -static CUresult ensureCUDAInitialized() { -  static CUresult InitResult = []() { return cuInit(0); }(); -  return InitResult; -} - -size_t CUDAPlatform::getDeviceCount() const { -  if (ensureCUDAInitialized()) -    // TODO(jhen): Log an error. -    return 0; - -  int DeviceCount = 0; -  CUresult Result = cuDeviceGetCount(&DeviceCount); -  (void)Result; -  // TODO(jhen): Log an error. - -  return DeviceCount; -} - -Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) { -  if (CUresult InitResult = ensureCUDAInitialized()) -    return CUresultToError(InitResult, "cached cuInit return value"); - -  llvm::sys::ScopedLock Lock(Mutex); -  auto Iterator = PlatformDevices.find(DeviceIndex); -  if (Iterator == PlatformDevices.end()) { -    if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) { -      Iterator = -          PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first; -    } else { -      return MaybePDevice.takeError(); -    } -  } -  return Device(&Iterator->second); -} - -} // namespace cuda -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp deleted file mode 100644 index 5284a9a0a35..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp +++ /dev/null @@ -1,307 +0,0 @@ -//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDAPlatformDevice. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" -#include "streamexecutor/PlatformDevice.h" - -#include "cuda.h" - -namespace streamexecutor { -namespace cuda { - -static void *offset(const void *Base, size_t Offset) { -  return const_cast<char *>(static_cast<const char *>(Base) + Offset); -} - -Error CUresultToError(int CUResult, const llvm::Twine &Message) { -  CUresult Result = static_cast<CUresult>(CUResult); -  if (Result) { -    const char *ErrorName; -    if (cuGetErrorName(Result, &ErrorName)) -      ErrorName = "UNKNOWN ERROR NAME"; -    const char *ErrorString; -    if (cuGetErrorString(Result, &ErrorString)) -      ErrorString = "UNKNOWN ERROR DESCRIPTION"; -    return make_error("CUDA driver error: '" + Message + "', error code = " + -                      llvm::Twine(static_cast<int>(Result)) + ", name = " + -                      ErrorName + ", description = '" + ErrorString + "'"); -  } else -    return Error::success(); -} - -std::string CUDAPlatformDevice::getName() const { -  static std::string CachedName = [](int DeviceIndex) { -    static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024; -    std::string Name = "CUDA device " + std::to_string(DeviceIndex); -    char NameFromDriver[MAX_DRIVER_NAME_BYTES]; -    if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1, -                         DeviceIndex)) { -      NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0'; -      Name.append(": ").append(NameFromDriver); -    } -    return Name; -  }(DeviceIndex); -  return CachedName; -} - -Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) { -  CUdevice DeviceHandle; -  if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex)) -    return CUresultToError(Result, "cuDeviceGet"); - -  CUcontext ContextHandle; -  if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle)) -    return CUresultToError(Result, "cuDevicePrimaryCtxRetain"); - -  if (CUresult Result = cuCtxSetCurrent(ContextHandle)) -    return CUresultToError(Result, "cuCtxSetCurrent"); - -  return CUDAPlatformDevice(DeviceIndex); -} - -CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept -    : DeviceIndex(Other.DeviceIndex) { -  Other.DeviceIndex = -1; -} - -CUDAPlatformDevice &CUDAPlatformDevice:: -operator=(CUDAPlatformDevice &&Other) noexcept { -  DeviceIndex = Other.DeviceIndex; -  Other.DeviceIndex = -1; -  return *this; -} - -CUDAPlatformDevice::~CUDAPlatformDevice() { -  CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex); -  (void)Result; -  // TODO(jhen): Log error. -} - -Expected<const void *> -CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) { -  // TODO(jhen): Maybe first check loaded modules? -  if (!Spec.hasCUDAPTXInMemory()) -    return make_error("no CUDA code available to create kernel"); - -  CUdevice Device = static_cast<int>(DeviceIndex); -  int ComputeCapabilityMajor = 0; -  int ComputeCapabilityMinor = 0; -  if (CUresult Result = cuDeviceGetAttribute( -          &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, -          Device)) -    return CUresultToError( -        Result, -        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR"); -  if (CUresult Result = cuDeviceGetAttribute( -          &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, -          Device)) -    return CUresultToError( -        Result, -        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR"); -  const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor, -                                                       ComputeCapabilityMinor); - -  if (!Code) -    return make_error("no suitable CUDA source found for compute capability " + -                      llvm::Twine(ComputeCapabilityMajor) + "." + -                      llvm::Twine(ComputeCapabilityMinor)); - -  CUmodule Module; -  if (CUresult Result = cuModuleLoadData(&Module, Code)) -    return CUresultToError(Result, "cuModuleLoadData"); - -  CUfunction Function; -  if (CUresult Result = -          cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str())) -    return CUresultToError(Result, "cuModuleGetFunction"); - -  // TODO(jhen): Should I save this function pointer in case someone asks for -  // it again? - -  // TODO(jhen): Should I save the module pointer so I can unload it when I -  // destroy this device? - -  return static_cast<const void *>(Function); -} - -Error CUDAPlatformDevice::destroyKernel(const void *Handle) { -  // TODO(jhen): Maybe keep track of kernels for each module and unload the -  // module after they are all destroyed. -  return Error::success(); -} - -Expected<const void *> CUDAPlatformDevice::createStream() { -  CUstream Stream; -  if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT)) -    return CUresultToError(Result, "cuStreamCreate"); -  return Stream; -} - -Error CUDAPlatformDevice::destroyStream(const void *Handle) { -  return CUresultToError( -      cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))), -      "cuStreamDestroy"); -} - -Error CUDAPlatformDevice::launch( -    const void *PlatformStreamHandle, BlockDimensions BlockSize, -    GridDimensions GridSize, const void *PKernelHandle, -    const PackedKernelArgumentArrayBase &ArgumentArray) { -  CUfunction Function = -      reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle)); -  CUstream Stream = -      reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle)); - -  auto Launch = [Function, Stream, BlockSize, -                 GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) { -    return CUresultToError( -        cuLaunchKernel(Function,                              // -                       GridSize.X, GridSize.Y, GridSize.Z,    // -                       BlockSize.X, BlockSize.Y, BlockSize.Z, // -                       SharedMemoryBytes, Stream, ArgumentAddresses, nullptr), -        "cuLaunchKernel"); -  }; - -  void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses()); -  size_t SharedArgumentCount = ArgumentArray.getSharedCount(); -  if (SharedArgumentCount) { -    // The argument handling in this case is not very efficient. We may need to -    // come back and optimize it later. -    // -    // Perhaps introduce another branch for the case where there is exactly one -    // shared memory argument and it is the first one. This is the only case -    // that will be used for compiler-generated CUDA kernels, and OpenCL users -    // can choose to take advantage of it by combining their dynamic shared -    // memory arguments and putting them first in the kernel signature. -    unsigned SharedMemoryBytes = 0; -    size_t ArgumentCount = ArgumentArray.getArgumentCount(); -    llvm::SmallVector<void *, 16> NonSharedArgumentAddresses( -        ArgumentCount - SharedArgumentCount); -    size_t NonSharedIndex = 0; -    for (size_t I = 0; I < ArgumentCount; ++I) -      if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY) -        SharedMemoryBytes += ArgumentArray.getSize(I); -      else -        NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I]; -    return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data()); -  } -  return Launch(0, ArgumentAddresses); -} - -Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle, -                                  const void *DeviceSrcHandle, -                                  size_t SrcByteOffset, void *HostDst, -                                  size_t DstByteOffset, size_t ByteCount) { -  return CUresultToError( -      cuMemcpyDtoHAsync( -          offset(HostDst, DstByteOffset), -          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), -          ByteCount, -          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), -      "cuMemcpyDtoHAsync"); -} - -Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle, -                                  const void *HostSrc, size_t SrcByteOffset, -                                  const void *DeviceDstHandle, -                                  size_t DstByteOffset, size_t ByteCount) { -  return CUresultToError( -      cuMemcpyHtoDAsync( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          offset(HostSrc, SrcByteOffset), ByteCount, -          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), -      "cuMemcpyHtoDAsync"); -} - -Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle, -                                  const void *DeviceSrcHandle, -                                  size_t SrcByteOffset, -                                  const void *DeviceDstHandle, -                                  size_t DstByteOffset, size_t ByteCount) { -  return CUresultToError( -      cuMemcpyDtoDAsync( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), -          ByteCount, -          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), -      "cuMemcpyDtoDAsync"); -} - -Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) { -  return CUresultToError(cuStreamSynchronize(static_cast<CUstream>( -                             const_cast<void *>(PlatformStreamHandle))), -                         "cuStreamSynchronize"); -} - -Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) { -  CUdeviceptr Pointer; -  if (CUresult Result = cuMemAlloc(&Pointer, ByteCount)) -    return CUresultToError(Result, "cuMemAlloc"); -  return reinterpret_cast<void *>(Pointer); -} - -Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) { -  return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)), -                         "cuMemFree"); -} - -Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) { -  return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u), -                         "cuMemHostRegister"); -} - -Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) { -  return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)), -                         "cuMemHostUnregister"); -} - -Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle, -                                             size_t SrcByteOffset, -                                             void *HostDst, -                                             size_t DstByteOffset, -                                             size_t ByteCount) { -  return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset), -                                      reinterpret_cast<CUdeviceptr>(offset( -                                          DeviceSrcHandle, SrcByteOffset)), -                                      ByteCount), -                         "cuMemcpyDtoH"); -} - -Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc, -                                             size_t SrcByteOffset, -                                             const void *DeviceDstHandle, -                                             size_t DstByteOffset, -                                             size_t ByteCount) { -  return CUresultToError( -      cuMemcpyHtoD( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          offset(HostSrc, SrcByteOffset), ByteCount), -      "cuMemcpyHtoD"); -} - -Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle, -                                             size_t DstByteOffset, -                                             const void *DeviceSrcHandle, -                                             size_t SrcByteOffset, -                                             size_t ByteCount) { -  return CUresultToError( -      cuMemcpyDtoD( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), -          ByteCount), -      "cuMemcpyDtoD"); -} - -} // namespace cuda -} // namespace streamexecutor | 

