diff options
Diffstat (limited to 'parallel-libs/streamexecutor/include/streamexecutor/Executor.h')
-rw-r--r-- | parallel-libs/streamexecutor/include/streamexecutor/Executor.h | 307 |
1 files changed, 306 insertions, 1 deletions
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h b/parallel-libs/streamexecutor/include/streamexecutor/Executor.h index 0f0696279a3..ea4224eb30c 100644 --- a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h +++ b/parallel-libs/streamexecutor/include/streamexecutor/Executor.h @@ -16,12 +16,12 @@ #define STREAMEXECUTOR_EXECUTOR_H #include "streamexecutor/KernelSpec.h" +#include "streamexecutor/PlatformInterfaces.h" #include "streamexecutor/Utils/Error.h" namespace streamexecutor { class KernelInterface; -class PlatformExecutor; class Stream; class Executor { @@ -38,6 +38,311 @@ public: Expected<std::unique_ptr<Stream>> createStream(); + /// Allocates an array of ElementCount entries of type T in device memory. + template <typename T> + Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) { + return PExecutor->allocateDeviceMemory(ElementCount * sizeof(T)); + } + + /// Frees memory previously allocated with allocateDeviceMemory. + template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) { + return PExecutor->freeDeviceMemory(Memory); + } + + /// Allocates an array of ElementCount entries of type T in host memory. + /// + /// Host memory allocated by this function can be used for asynchronous memory + /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. + template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) { + return PExecutor->allocateHostMemory(ElementCount * sizeof(T)); + } + + /// Frees memory previously allocated with allocateHostMemory. + template <typename T> Error freeHostMemory(T *Memory) { + return PExecutor->freeHostMemory(Memory); + } + + /// Registers a previously allocated host array of type T for asynchronous + /// memory operations. + /// + /// Host memory registered by this function can be used for asynchronous + /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. + template <typename T> + Error registerHostMemory(T *Memory, size_t ElementCount) { + return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T)); + } + + /// Unregisters host memory previously registered by registerHostMemory. + template <typename T> Error unregisterHostMemory(T *Memory) { + return PExecutor->unregisterHostMemory(Memory); + } + + /// Host-synchronously copies a slice of an array of elements of type T from + /// host to device memory. + /// + /// Returns an error if ElementCount is too large for the source slice or the + /// destination. + /// + /// The calling host thread is blocked until the copy completes. Can be used + /// with any host memory, the host memory does not have to be allocated with + /// allocateHostMemory or registered with registerHostMemory. Does not block + /// any ongoing device calls. + template <typename T> + Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, + llvm::MutableArrayRef<T> Dst, size_t ElementCount) { + if (ElementCount > Src.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", from a device array of element count " + + llvm::Twine(Src.getElementCount())); + if (ElementCount > Dst.size()) + return make_error( + "copying too many elements, " + llvm::Twine(ElementCount) + + ", to a host array of element count " + llvm::Twine(Dst.size())); + return PExecutor->synchronousCopyD2H( + Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0, + ElementCount * sizeof(T)); + } + + /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, + /// llvm::MutableArrayRef<T>, size_t) but does not take an element count + /// argument because it copies the entire source array. + /// + /// Returns an error if the Src and Dst sizes do not match. + template <typename T> + Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, + llvm::MutableArrayRef<T> Dst) { + if (Src.getElementCount() != Dst.size()) + return make_error( + "array size mismatch for D2H, device source has element count " + + llvm::Twine(Src.getElementCount()) + + " but host destination has element count " + llvm::Twine(Dst.size())); + return synchronousCopyD2H(Src, Dst, Src.getElementCount()); + } + + /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, + /// llvm::MutableArrayRef<T>, size_t) but copies to a pointer rather than an + /// llvm::MutableArrayRef. + /// + /// Returns an error if ElementCount is too large for the source slice. + template <typename T> + Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst, + size_t ElementCount) { + return synchronousCopyD2H(Src, llvm::MutableArrayRef<T>(Dst, ElementCount), + ElementCount); + } + + /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, + /// llvm::MutableArrayRef<T>, size_t) but the source is a GlobalDeviceMemory + /// rather than a GlobalDeviceMemorySlice. + template <typename T> + Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, + llvm::MutableArrayRef<T> Dst, size_t ElementCount) { + return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); + } + + /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, + /// llvm::MutableArrayRef<T>) but the source is a GlobalDeviceMemory rather + /// than a GlobalDeviceMemorySlice. + template <typename T> + Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, + llvm::MutableArrayRef<T> Dst) { + return synchronousCopyD2H(Src.asSlice(), Dst); + } + + /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, T*, size_t) but + /// the source is a GlobalDeviceMemory rather than a GlobalDeviceMemorySlice. + template <typename T> + Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, T *Dst, + size_t ElementCount) { + return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); + } + + /// Host-synchronously copies a slice of an array of elements of type T from + /// device to host memory. + /// + /// Returns an error if ElementCount is too large for the source or the + /// destination. + /// + /// The calling host thread is blocked until the copy completes. Can be used + /// with any host memory, the host memory does not have to be allocated with + /// allocateHostMemory or registered with registerHostMemory. Does not block + /// any ongoing device calls. + template <typename T> + Error synchronousCopyH2D(llvm::ArrayRef<T> Src, + GlobalDeviceMemorySlice<T> Dst, + size_t ElementCount) { + if (ElementCount > Src.size()) + return make_error( + "copying too many elements, " + llvm::Twine(ElementCount) + + ", from a host array of element count " + llvm::Twine(Src.size())); + if (ElementCount > Dst.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", to a device array of element count " + + llvm::Twine(Dst.getElementCount())); + return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(), + Dst.getElementOffset() * sizeof(T), + ElementCount * sizeof(T)); + } + + /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count + /// argument because it copies the entire source array. + /// + /// Returns an error if the Src and Dst sizes do not match. + template <typename T> + Error synchronousCopyH2D(llvm::ArrayRef<T> Src, + GlobalDeviceMemorySlice<T> Dst) { + if (Src.size() != Dst.getElementCount()) + return make_error( + "array size mismatch for H2D, host source has element count " + + llvm::Twine(Src.size()) + + " but device destination has element count " + + llvm::Twine(Dst.getElementCount())); + return synchronousCopyH2D(Src, Dst, Dst.getElementCount()); + } + + /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but copies from a pointer rather than + /// an llvm::ArrayRef. + /// + /// Returns an error if ElementCount is too large for the destination. + template <typename T> + Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst, + size_t ElementCount) { + return synchronousCopyH2D(llvm::ArrayRef<T>(Src, ElementCount), Dst, + ElementCount); + } + + /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but the destination is a + /// GlobalDeviceMemory rather than a GlobalDeviceMemorySlice. + template <typename T> + Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst, + size_t ElementCount) { + return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); + } + + /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>, + /// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory + /// rather than a GlobalDeviceMemorySlice. + template <typename T> + Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) { + return synchronousCopyH2D(Src, Dst.asSlice()); + } + + /// Similar to synchronousCopyH2D(T*, GlobalDeviceMemorySlice<T>, size_t) but + /// the destination is a GlobalDeviceMemory rather than a + /// GlobalDeviceMemorySlice. + template <typename T> + Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> Dst, + size_t ElementCount) { + return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); + } + + /// Host-synchronously copies a slice of an array of elements of type T from + /// one location in device memory to another. + /// + /// Returns an error if ElementCount is too large for the source slice or the + /// destination. + /// + /// The calling host thread is blocked until the copy completes. Can be used + /// with any host memory, the host memory does not have to be allocated with + /// allocateHostMemory or registered with registerHostMemory. Does not block + /// any ongoing device calls. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, + GlobalDeviceMemorySlice<T> Dst, + size_t ElementCount) { + if (ElementCount > Src.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", from a device array of element count " + + llvm::Twine(Src.getElementCount())); + if (ElementCount > Dst.getElementCount()) + return make_error("copying too many elements, " + + llvm::Twine(ElementCount) + + ", to a device array of element count " + + llvm::Twine(Dst.getElementCount())); + return PExecutor->synchronousCopyD2D( + Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), + Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T), + ElementCount * sizeof(T)); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count + /// argument because it copies the entire source array. + /// + /// Returns an error if the Src and Dst sizes do not match. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, + GlobalDeviceMemorySlice<T> Dst) { + if (Src.getElementCount() != Dst.getElementCount()) + return make_error( + "array size mismatch for D2D, device source has element count " + + llvm::Twine(Src.getElementCount()) + + " but device destination has element count " + + llvm::Twine(Dst.getElementCount())); + return synchronousCopyD2D(Src, Dst, Src.getElementCount()); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but the source is a + /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, + GlobalDeviceMemorySlice<T> Dst, + size_t ElementCount) { + return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>) but the source is a GlobalDeviceMemory<T> + /// rather than a GlobalDeviceMemorySlice<T>. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, + GlobalDeviceMemorySlice<T> Dst) { + return synchronousCopyD2D(Src.asSlice(), Dst); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but the destination is a + /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, + GlobalDeviceMemory<T> Dst, size_t ElementCount) { + return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory<T> + /// rather than a GlobalDeviceMemorySlice<T>. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, + GlobalDeviceMemory<T> Dst) { + return synchronousCopyD2D(Src, Dst.asSlice()); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>, size_t) but the source and destination are + /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst, + size_t ElementCount) { + return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount); + } + + /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>, + /// GlobalDeviceMemorySlice<T>) but the source and destination are + /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>. + template <typename T> + Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, + GlobalDeviceMemory<T> Dst) { + return synchronousCopyD2D(Src.asSlice(), Dst.asSlice()); + } + private: PlatformExecutor *PExecutor; }; |