summaryrefslogtreecommitdiffstats
path: root/parallel-libs/streamexecutor/include/streamexecutor/Executor.h
diff options
context:
space:
mode:
Diffstat (limited to 'parallel-libs/streamexecutor/include/streamexecutor/Executor.h')
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/Executor.h307
1 files changed, 306 insertions, 1 deletions
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h b/parallel-libs/streamexecutor/include/streamexecutor/Executor.h
index 0f0696279a3..ea4224eb30c 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/Executor.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/Executor.h
@@ -16,12 +16,12 @@
#define STREAMEXECUTOR_EXECUTOR_H
#include "streamexecutor/KernelSpec.h"
+#include "streamexecutor/PlatformInterfaces.h"
#include "streamexecutor/Utils/Error.h"
namespace streamexecutor {
class KernelInterface;
-class PlatformExecutor;
class Stream;
class Executor {
@@ -38,6 +38,311 @@ public:
Expected<std::unique_ptr<Stream>> createStream();
+ /// Allocates an array of ElementCount entries of type T in device memory.
+ template <typename T>
+ Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
+ return PExecutor->allocateDeviceMemory(ElementCount * sizeof(T));
+ }
+
+ /// Frees memory previously allocated with allocateDeviceMemory.
+ template <typename T> Error freeDeviceMemory(GlobalDeviceMemory<T> Memory) {
+ return PExecutor->freeDeviceMemory(Memory);
+ }
+
+ /// Allocates an array of ElementCount entries of type T in host memory.
+ ///
+ /// Host memory allocated by this function can be used for asynchronous memory
+ /// copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
+ template <typename T> Expected<T *> allocateHostMemory(size_t ElementCount) {
+ return PExecutor->allocateHostMemory(ElementCount * sizeof(T));
+ }
+
+ /// Frees memory previously allocated with allocateHostMemory.
+ template <typename T> Error freeHostMemory(T *Memory) {
+ return PExecutor->freeHostMemory(Memory);
+ }
+
+ /// Registers a previously allocated host array of type T for asynchronous
+ /// memory operations.
+ ///
+ /// Host memory registered by this function can be used for asynchronous
+ /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
+ template <typename T>
+ Error registerHostMemory(T *Memory, size_t ElementCount) {
+ return PExecutor->registerHostMemory(Memory, ElementCount * sizeof(T));
+ }
+
+ /// Unregisters host memory previously registered by registerHostMemory.
+ template <typename T> Error unregisterHostMemory(T *Memory) {
+ return PExecutor->unregisterHostMemory(Memory);
+ }
+
+ /// Host-synchronously copies a slice of an array of elements of type T from
+ /// host to device memory.
+ ///
+ /// Returns an error if ElementCount is too large for the source slice or the
+ /// destination.
+ ///
+ /// The calling host thread is blocked until the copy completes. Can be used
+ /// with any host memory, the host memory does not have to be allocated with
+ /// allocateHostMemory or registered with registerHostMemory. Does not block
+ /// any ongoing device calls.
+ template <typename T>
+ Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
+ llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
+ if (ElementCount > Src.getElementCount())
+ return make_error("copying too many elements, " +
+ llvm::Twine(ElementCount) +
+ ", from a device array of element count " +
+ llvm::Twine(Src.getElementCount()));
+ if (ElementCount > Dst.size())
+ return make_error(
+ "copying too many elements, " + llvm::Twine(ElementCount) +
+ ", to a host array of element count " + llvm::Twine(Dst.size()));
+ return PExecutor->synchronousCopyD2H(
+ Src.getBaseMemory(), Src.getElementOffset() * sizeof(T), Dst.data(), 0,
+ ElementCount * sizeof(T));
+ }
+
+ /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
+ /// llvm::MutableArrayRef<T>, size_t) but does not take an element count
+ /// argument because it copies the entire source array.
+ ///
+ /// Returns an error if the Src and Dst sizes do not match.
+ template <typename T>
+ Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
+ llvm::MutableArrayRef<T> Dst) {
+ if (Src.getElementCount() != Dst.size())
+ return make_error(
+ "array size mismatch for D2H, device source has element count " +
+ llvm::Twine(Src.getElementCount()) +
+ " but host destination has element count " + llvm::Twine(Dst.size()));
+ return synchronousCopyD2H(Src, Dst, Src.getElementCount());
+ }
+
+ /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
+ /// llvm::MutableArrayRef<T>, size_t) but copies to a pointer rather than an
+ /// llvm::MutableArrayRef.
+ ///
+ /// Returns an error if ElementCount is too large for the source slice.
+ template <typename T>
+ Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst,
+ size_t ElementCount) {
+ return synchronousCopyD2H(Src, llvm::MutableArrayRef<T>(Dst, ElementCount),
+ ElementCount);
+ }
+
+ /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
+ /// llvm::MutableArrayRef<T>, size_t) but the source is a GlobalDeviceMemory
+ /// rather than a GlobalDeviceMemorySlice.
+ template <typename T>
+ Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
+ llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
+ return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
+ }
+
+ /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>,
+ /// llvm::MutableArrayRef<T>) but the source is a GlobalDeviceMemory rather
+ /// than a GlobalDeviceMemorySlice.
+ template <typename T>
+ Error synchronousCopyD2H(GlobalDeviceMemory<T> Src,
+ llvm::MutableArrayRef<T> Dst) {
+ return synchronousCopyD2H(Src.asSlice(), Dst);
+ }
+
+ /// Similar to synchronousCopyD2H(GlobalDeviceMemorySlice<T>, T*, size_t) but
+ /// the source is a GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
+ template <typename T>
+ Error synchronousCopyD2H(GlobalDeviceMemory<T> Src, T *Dst,
+ size_t ElementCount) {
+ return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
+ }
+
+ /// Host-synchronously copies a slice of an array of elements of type T from
+ /// device to host memory.
+ ///
+ /// Returns an error if ElementCount is too large for the source or the
+ /// destination.
+ ///
+ /// The calling host thread is blocked until the copy completes. Can be used
+ /// with any host memory, the host memory does not have to be allocated with
+ /// allocateHostMemory or registered with registerHostMemory. Does not block
+ /// any ongoing device calls.
+ template <typename T>
+ Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
+ GlobalDeviceMemorySlice<T> Dst,
+ size_t ElementCount) {
+ if (ElementCount > Src.size())
+ return make_error(
+ "copying too many elements, " + llvm::Twine(ElementCount) +
+ ", from a host array of element count " + llvm::Twine(Src.size()));
+ if (ElementCount > Dst.getElementCount())
+ return make_error("copying too many elements, " +
+ llvm::Twine(ElementCount) +
+ ", to a device array of element count " +
+ llvm::Twine(Dst.getElementCount()));
+ return PExecutor->synchronousCopyH2D(Src.data(), 0, Dst.getBaseMemory(),
+ Dst.getElementOffset() * sizeof(T),
+ ElementCount * sizeof(T));
+ }
+
+ /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
+ /// argument because it copies the entire source array.
+ ///
+ /// Returns an error if the Src and Dst sizes do not match.
+ template <typename T>
+ Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
+ GlobalDeviceMemorySlice<T> Dst) {
+ if (Src.size() != Dst.getElementCount())
+ return make_error(
+ "array size mismatch for H2D, host source has element count " +
+ llvm::Twine(Src.size()) +
+ " but device destination has element count " +
+ llvm::Twine(Dst.getElementCount()));
+ return synchronousCopyH2D(Src, Dst, Dst.getElementCount());
+ }
+
+ /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but copies from a pointer rather than
+ /// an llvm::ArrayRef.
+ ///
+ /// Returns an error if ElementCount is too large for the destination.
+ template <typename T>
+ Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst,
+ size_t ElementCount) {
+ return synchronousCopyH2D(llvm::ArrayRef<T>(Src, ElementCount), Dst,
+ ElementCount);
+ }
+
+ /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
+ /// GlobalDeviceMemory rather than a GlobalDeviceMemorySlice.
+ template <typename T>
+ Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst,
+ size_t ElementCount) {
+ return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
+ }
+
+ /// Similar to synchronousCopyH2D(llvm::ArrayRef<T>,
+ /// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory
+ /// rather than a GlobalDeviceMemorySlice.
+ template <typename T>
+ Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> Dst) {
+ return synchronousCopyH2D(Src, Dst.asSlice());
+ }
+
+ /// Similar to synchronousCopyH2D(T*, GlobalDeviceMemorySlice<T>, size_t) but
+ /// the destination is a GlobalDeviceMemory rather than a
+ /// GlobalDeviceMemorySlice.
+ template <typename T>
+ Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> Dst,
+ size_t ElementCount) {
+ return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
+ }
+
+ /// Host-synchronously copies a slice of an array of elements of type T from
+ /// one location in device memory to another.
+ ///
+ /// Returns an error if ElementCount is too large for the source slice or the
+ /// destination.
+ ///
+ /// The calling host thread is blocked until the copy completes. Can be used
+ /// with any host memory, the host memory does not have to be allocated with
+ /// allocateHostMemory or registered with registerHostMemory. Does not block
+ /// any ongoing device calls.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
+ GlobalDeviceMemorySlice<T> Dst,
+ size_t ElementCount) {
+ if (ElementCount > Src.getElementCount())
+ return make_error("copying too many elements, " +
+ llvm::Twine(ElementCount) +
+ ", from a device array of element count " +
+ llvm::Twine(Src.getElementCount()));
+ if (ElementCount > Dst.getElementCount())
+ return make_error("copying too many elements, " +
+ llvm::Twine(ElementCount) +
+ ", to a device array of element count " +
+ llvm::Twine(Dst.getElementCount()));
+ return PExecutor->synchronousCopyD2D(
+ Src.getBaseMemory(), Src.getElementOffset() * sizeof(T),
+ Dst.getBaseMemory(), Dst.getElementOffset() * sizeof(T),
+ ElementCount * sizeof(T));
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but does not take an element count
+ /// argument because it copies the entire source array.
+ ///
+ /// Returns an error if the Src and Dst sizes do not match.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
+ GlobalDeviceMemorySlice<T> Dst) {
+ if (Src.getElementCount() != Dst.getElementCount())
+ return make_error(
+ "array size mismatch for D2D, device source has element count " +
+ llvm::Twine(Src.getElementCount()) +
+ " but device destination has element count " +
+ llvm::Twine(Dst.getElementCount()));
+ return synchronousCopyD2D(Src, Dst, Src.getElementCount());
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but the source is a
+ /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
+ GlobalDeviceMemorySlice<T> Dst,
+ size_t ElementCount) {
+ return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount);
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>) but the source is a GlobalDeviceMemory<T>
+ /// rather than a GlobalDeviceMemorySlice<T>.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
+ GlobalDeviceMemorySlice<T> Dst) {
+ return synchronousCopyD2D(Src.asSlice(), Dst);
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but the destination is a
+ /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
+ GlobalDeviceMemory<T> Dst, size_t ElementCount) {
+ return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount);
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>) but the destination is a GlobalDeviceMemory<T>
+ /// rather than a GlobalDeviceMemorySlice<T>.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
+ GlobalDeviceMemory<T> Dst) {
+ return synchronousCopyD2D(Src, Dst.asSlice());
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>, size_t) but the source and destination are
+ /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemory<T> Src, GlobalDeviceMemory<T> Dst,
+ size_t ElementCount) {
+ return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount);
+ }
+
+ /// Similar to synchronousCopyD2D(GlobalDeviceMemorySlice<T>,
+ /// GlobalDeviceMemorySlice<T>) but the source and destination are
+ /// GlobalDeviceMemory<T> rather than a GlobalDeviceMemorySlice<T>.
+ template <typename T>
+ Error synchronousCopyD2D(GlobalDeviceMemory<T> Src,
+ GlobalDeviceMemory<T> Dst) {
+ return synchronousCopyD2D(Src.asSlice(), Dst.asSlice());
+ }
+
private:
PlatformExecutor *PExecutor;
};
OpenPOWER on IntegriCloud