summaryrefslogtreecommitdiffstats
path: root/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h
diff options
context:
space:
mode:
Diffstat (limited to 'parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h')
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h278
1 files changed, 0 insertions, 278 deletions
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h b/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h
deleted file mode 100644
index 62f6e579933..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h
+++ /dev/null
@@ -1,278 +0,0 @@
-//===-- DeviceMemory.h - Types representing device memory -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file defines types that represent device memory buffers. Two memory
-/// spaces are represented here: global and shared. Host code can have a handle
-/// to device global memory, and that handle can be used to copy data to and
-/// from the device. Host code cannot have a handle to device shared memory
-/// because that memory only exists during the execution of a kernel.
-///
-/// GlobalDeviceMemory<T> is a handle to an array of elements of type T in
-/// global device memory. It is similar to a pair of a std::unique_ptr<T> and an
-/// element count to tell how many elements of type T fit in the memory pointed
-/// to by that T*.
-///
-/// SharedDeviceMemory<T> is just the size in elements of an array of elements
-/// of type T in device shared memory. No resources are actually attached to
-/// this class, it is just like a memo to the device to allocate space in shared
-/// memory.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_DEVICEMEMORY_H
-#define STREAMEXECUTOR_DEVICEMEMORY_H
-
-#include <cassert>
-#include <cstddef>
-
-#include "streamexecutor/Error.h"
-
-namespace streamexecutor {
-
-class Device;
-
-template <typename ElemT> class GlobalDeviceMemory;
-
-/// Reference to a slice of device memory.
-///
-/// Contains a base memory handle, an element count offset into that base
-/// memory, and an element count for the size of the slice.
-template <typename ElemT> class GlobalDeviceMemorySlice {
-public:
- using ElementTy = ElemT;
-
- /// Intentionally implicit so GlobalDeviceMemory<T> can be passed to functions
- /// expecting GlobalDeviceMemorySlice<T> arguments.
- GlobalDeviceMemorySlice(const GlobalDeviceMemory<ElemT> &Memory)
- : BaseMemory(Memory), ElementOffset(0),
- ElementCount(Memory.getElementCount()) {}
-
- GlobalDeviceMemorySlice(const GlobalDeviceMemory<ElemT> &BaseMemory,
- size_t ElementOffset, size_t ElementCount)
- : BaseMemory(BaseMemory), ElementOffset(ElementOffset),
- ElementCount(ElementCount) {
- assert(ElementOffset + ElementCount <= BaseMemory.getElementCount() &&
- "slicing past the end of a GlobalDeviceMemory buffer");
- }
-
- /// Gets the GlobalDeviceMemory backing this slice.
- const GlobalDeviceMemory<ElemT> &getBaseMemory() const { return BaseMemory; }
-
- /// Gets the offset of this slice from the base memory.
- ///
- /// The offset is measured in elements, not bytes.
- size_t getElementOffset() const { return ElementOffset; }
-
- /// Gets the number of elements in this slice.
- size_t getElementCount() const { return ElementCount; }
-
- /// Returns the number of bytes that can fit in this slice.
- size_t getByteCount() const { return ElementCount * sizeof(ElemT); }
-
- /// Creates a slice of the memory with the first DropCount elements removed.
- LLVM_ATTRIBUTE_UNUSED_RESULT
- GlobalDeviceMemorySlice<ElemT> slice(size_t DropCount) const {
- assert(DropCount <= ElementCount &&
- "dropping more than the size of a slice");
- return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset + DropCount,
- ElementCount - DropCount);
- }
-
- /// Creates a slice of the memory with the last DropCount elements removed.
- LLVM_ATTRIBUTE_UNUSED_RESULT
- GlobalDeviceMemorySlice<ElemT> drop_back(size_t DropCount) const {
- assert(DropCount <= ElementCount &&
- "dropping more than the size of a slice");
- return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset,
- ElementCount - DropCount);
- }
-
- /// Creates a slice of the memory that chops off the first DropCount elements
- /// and keeps the next TakeCount elements.
- LLVM_ATTRIBUTE_UNUSED_RESULT
- GlobalDeviceMemorySlice<ElemT> slice(size_t DropCount,
- size_t TakeCount) const {
- assert(DropCount + TakeCount <= ElementCount &&
- "sub-slice operation overruns slice bounds");
- return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset + DropCount,
- TakeCount);
- }
-
-private:
- const GlobalDeviceMemory<ElemT> &BaseMemory;
- size_t ElementOffset;
- size_t ElementCount;
-};
-
-/// Wrapper around a generic global device memory allocation.
-///
-/// This class represents a buffer of untyped bytes in the global memory space
-/// of a device. See GlobalDeviceMemory<T> for the corresponding type that
-/// includes type information for the elements in its buffer.
-///
-/// This is effectively a pair consisting of an opaque handle and a buffer size
-/// in bytes. The opaque handle is a platform-dependent handle to the actual
-/// memory that is allocated on the device.
-///
-/// In some cases, such as in the CUDA platform, the opaque handle may actually
-/// be a pointer in the virtual address space and it may be valid to perform
-/// arithmetic on it to obtain other device pointers, but this is not the case
-/// in general.
-///
-/// For example, in the OpenCL platform, the handle is a pointer to a _cl_mem
-/// handle object which really is completely opaque to the user.
-class GlobalDeviceMemoryBase {
-public:
- /// Returns an opaque handle to the underlying memory.
- const void *getHandle() const { return Handle; }
-
- /// Returns the address of the opaque handle as stored by this object.
- const void *const *getHandleAddress() const { return &Handle; }
-
- // Cannot copy because the handle must be owned by a single object.
- GlobalDeviceMemoryBase(const GlobalDeviceMemoryBase &) = delete;
- GlobalDeviceMemoryBase &operator=(const GlobalDeviceMemoryBase &) = delete;
-
-protected:
- /// Creates a GlobalDeviceMemoryBase from a handle and a byte count.
- GlobalDeviceMemoryBase(Device *D, const void *Handle, size_t ByteCount)
- : TheDevice(D), Handle(Handle), ByteCount(ByteCount) {}
-
- /// Transfer ownership of the underlying handle.
- GlobalDeviceMemoryBase(GlobalDeviceMemoryBase &&Other) noexcept
- : TheDevice(Other.TheDevice), Handle(Other.Handle),
- ByteCount(Other.ByteCount) {
- Other.TheDevice = nullptr;
- Other.Handle = nullptr;
- Other.ByteCount = 0;
- }
-
- GlobalDeviceMemoryBase &operator=(GlobalDeviceMemoryBase &&Other) noexcept {
- TheDevice = Other.TheDevice;
- Handle = Other.Handle;
- ByteCount = Other.ByteCount;
- Other.TheDevice = nullptr;
- Other.Handle = nullptr;
- Other.ByteCount = 0;
- return *this;
- }
-
- ~GlobalDeviceMemoryBase();
-
- Device *TheDevice; // Pointer to the device on which this memory lives.
- const void *Handle; // Platform-dependent value representing allocated memory.
- size_t ByteCount; // Size in bytes of this allocation.
-};
-
-/// Typed wrapper around the "void *"-like GlobalDeviceMemoryBase class.
-///
-/// For example, GlobalDeviceMemory<int> is a simple wrapper around
-/// GlobalDeviceMemoryBase that represents a buffer of integers stored in global
-/// device memory.
-template <typename ElemT>
-class GlobalDeviceMemory : public GlobalDeviceMemoryBase {
-public:
- using ElementTy = ElemT;
-
- GlobalDeviceMemory(GlobalDeviceMemory &&) noexcept;
- GlobalDeviceMemory &operator=(GlobalDeviceMemory &&) noexcept;
-
- /// Returns the number of elements of type ElemT that constitute this
- /// allocation.
- size_t getElementCount() const { return ByteCount / sizeof(ElemT); }
-
- /// Returns the number of bytes that can fit in this memory buffer.
- size_t getByteCount() const { return ByteCount; }
-
- /// Converts this memory object into a slice.
- GlobalDeviceMemorySlice<ElemT> asSlice() const {
- return GlobalDeviceMemorySlice<ElemT>(*this);
- }
-
-private:
- GlobalDeviceMemory(const GlobalDeviceMemory &) = delete;
- GlobalDeviceMemory &operator=(const GlobalDeviceMemory &) = delete;
-
- // Only a Device can create a GlobalDeviceMemory instance.
- friend Device;
- GlobalDeviceMemory(Device *D, const void *Handle, size_t ElementCount)
- : GlobalDeviceMemoryBase(D, Handle, ElementCount * sizeof(ElemT)) {}
-};
-
-template <typename ElemT>
-GlobalDeviceMemory<ElemT>::GlobalDeviceMemory(
- GlobalDeviceMemory<ElemT> &&) noexcept = default;
-
-template <typename ElemT>
-GlobalDeviceMemory<ElemT> &GlobalDeviceMemory<ElemT>::
-operator=(GlobalDeviceMemory<ElemT> &&) noexcept = default;
-
-/// A class to represent the size of a dynamic shared memory buffer of elements
-/// of type T on a device.
-///
-/// Shared memory buffers exist only on the device and cannot be manipulated
-/// from the host, so instances of this class do not have an opaque handle, only
-/// a size.
-///
-/// This type of memory is called "local" memory in OpenCL and "shared" memory
-/// in CUDA, and both platforms follow the rule that the host code only knows
-/// the size of these buffers and does not have a handle to them.
-///
-/// The treatment of shared memory in StreamExecutor matches the way it is done
-/// in OpenCL, where a kernel takes any number of shared memory sizes as kernel
-/// function arguments.
-///
-/// In CUDA only one shared memory size argument is allowed per kernel call.
-/// StreamExecutor handles this by allowing CUDA kernel signatures that take
-/// multiple SharedDeviceMemory arguments, and simply adding together all the
-/// shared memory sizes to get the final shared memory size that is used to
-/// launch the kernel.
-template <typename ElemT> class SharedDeviceMemory {
-public:
- /// Creates a typed area of shared device memory with a given number of
- /// elements.
- static SharedDeviceMemory<ElemT> makeFromElementCount(size_t ElementCount) {
- return SharedDeviceMemory(ElementCount);
- }
-
- /// Copyable because it is just an array size.
- SharedDeviceMemory(const SharedDeviceMemory &) = default;
-
- /// Copy-assignable because it is just an array size.
- SharedDeviceMemory &operator=(const SharedDeviceMemory &) = default;
-
- /// Returns the number of elements of type ElemT that can fit in this memory
- /// buffer.
- size_t getElementCount() const { return ElementCount; }
-
- /// Returns the number of bytes that can fit in this memory buffer.
- size_t getByteCount() const { return ElementCount * sizeof(ElemT); }
-
- /// Returns whether this is a single-element memory buffer.
- bool isScalar() const { return getElementCount() == 1; }
-
-private:
- /// Constructs a SharedDeviceMemory instance from an element count.
- ///
- /// This constructor is not public because there is a potential for confusion
- /// between the size of the buffer in bytes and the size of the buffer in
- /// elements.
- ///
- /// The static method makeFromElementCount is provided for users of this class
- /// because its name makes the meaning of the size parameter clear.
- explicit SharedDeviceMemory(size_t ElementCount)
- : ElementCount(ElementCount) {}
-
- size_t ElementCount;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_DEVICEMEMORY_H
OpenPOWER on IntegriCloud