summaryrefslogtreecommitdiffstats
path: root/parallel-libs/streamexecutor
diff options
context:
space:
mode:
Diffstat (limited to 'parallel-libs/streamexecutor')
-rw-r--r--parallel-libs/streamexecutor/examples/CMakeLists.txt3
-rw-r--r--parallel-libs/streamexecutor/examples/CUDASaxpy.cpp1
-rw-r--r--parallel-libs/streamexecutor/examples/HostSaxpy.cpp94
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h18
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h8
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h56
-rw-r--r--parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h151
-rw-r--r--parallel-libs/streamexecutor/lib/PlatformManager.cpp3
8 files changed, 329 insertions, 5 deletions
diff --git a/parallel-libs/streamexecutor/examples/CMakeLists.txt b/parallel-libs/streamexecutor/examples/CMakeLists.txt
index 1d09a545429..cb061d5ca96 100644
--- a/parallel-libs/streamexecutor/examples/CMakeLists.txt
+++ b/parallel-libs/streamexecutor/examples/CMakeLists.txt
@@ -1,2 +1,5 @@
add_executable(cuda_saxpy_example CUDASaxpy.cpp)
target_link_libraries(cuda_saxpy_example streamexecutor)
+
+add_executable(host_saxpy_example HostSaxpy.cpp)
+target_link_libraries(host_saxpy_example streamexecutor)
diff --git a/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp b/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp
index 5fb3dba26a7..0fce5ed046b 100644
--- a/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp
+++ b/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp
@@ -17,7 +17,6 @@
#include <algorithm>
#include <cassert>
-#include <cstdio>
#include <cstdlib>
#include <vector>
diff --git a/parallel-libs/streamexecutor/examples/HostSaxpy.cpp b/parallel-libs/streamexecutor/examples/HostSaxpy.cpp
new file mode 100644
index 00000000000..525c4453b01
--- /dev/null
+++ b/parallel-libs/streamexecutor/examples/HostSaxpy.cpp
@@ -0,0 +1,94 @@
+//===-- HostSaxpy.cpp - Example of host saxpy with StreamExecutor API -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains example code demonstrating the usage of the
+/// StreamExecutor API for a host platform.
+///
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <cassert>
+#include <cstdio>
+#include <vector>
+
+#include "streamexecutor/StreamExecutor.h"
+
+void Saxpy(float A, float *X, float *Y, size_t N) {
+ for (size_t I = 0; I < N; ++I)
+ X[I] = A * X[I] + Y[I];
+}
+
+namespace __compilergen {
+using SaxpyKernel =
+ streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>,
+ streamexecutor::GlobalDeviceMemory<float>, size_t>;
+
+// Wrapper function converts argument addresses to arguments.
+void SaxpyWrapper(const void *const *ArgumentAddresses) {
+ Saxpy(*static_cast<const float *>(ArgumentAddresses[0]),
+ static_cast<float *>(const_cast<void *>(ArgumentAddresses[1])),
+ static_cast<float *>(const_cast<void *>(ArgumentAddresses[2])),
+ *static_cast<const size_t *>(ArgumentAddresses[3]));
+}
+
+// The wrapper function is what gets registered.
+static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() {
+ streamexecutor::MultiKernelLoaderSpec Spec;
+ Spec.addHostFunction("Saxpy", SaxpyWrapper);
+ return Spec;
+}();
+} // namespace __compilergen
+
+int main() {
+ namespace se = ::streamexecutor;
+ namespace cg = ::__compilergen;
+
+ // Create some host data.
+ float A = 42.0f;
+ std::vector<float> HostX = {0, 1, 2, 3};
+ std::vector<float> HostY = {4, 5, 6, 7};
+ size_t ArraySize = HostX.size();
+
+ // Get a device object.
+ se::Platform *Platform =
+ getOrDie(se::PlatformManager::getPlatformByName("host"));
+ if (Platform->getDeviceCount() == 0) {
+ return EXIT_FAILURE;
+ }
+ se::Device *Device = getOrDie(Platform->getDevice(0));
+
+ // Load the kernel onto the device.
+ cg::SaxpyKernel Kernel =
+ getOrDie(Device->createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));
+
+ se::RegisteredHostMemory<float> RegisteredX =
+ getOrDie(Device->registerHostMemory<float>(HostX));
+ se::RegisteredHostMemory<float> RegisteredY =
+ getOrDie(Device->registerHostMemory<float>(HostY));
+
+ // Allocate memory on the device.
+ se::GlobalDeviceMemory<float> X =
+ getOrDie(Device->allocateDeviceMemory<float>(ArraySize));
+ se::GlobalDeviceMemory<float> Y =
+ getOrDie(Device->allocateDeviceMemory<float>(ArraySize));
+
+ // Run operations on a stream.
+ se::Stream Stream = getOrDie(Device->createStream());
+ Stream.thenCopyH2D(RegisteredX, X)
+ .thenCopyH2D(RegisteredY, Y)
+ .thenLaunch(1, 1, Kernel, A, X, Y, ArraySize)
+ .thenCopyD2H(X, RegisteredX);
+ // Wait for the stream to complete.
+ se::dieIfError(Stream.blockHostUntilDone());
+
+ // Process output data in HostX.
+ std::vector<float> ExpectedX = {4, 47, 90, 133};
+ assert(std::equal(ExpectedX.begin(), ExpectedX.end(), HostX.begin()));
+}
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h b/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
index c4b6722caf6..caf6f1bdc4f 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
@@ -65,11 +65,13 @@
#define STREAMEXECUTOR_KERNELSPEC_H
#include <cassert>
+#include <functional>
#include <map>
#include <memory>
#include <string>
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
namespace streamexecutor {
@@ -199,6 +201,9 @@ private:
/// than doing it by hand.
class MultiKernelLoaderSpec {
public:
+ /// Type of functions used as host platform kernels.
+ using HostFunctionTy = std::function<void(const void **)>;
+
std::string getKernelName() const {
if (TheKernelName)
return *TheKernelName;
@@ -215,6 +220,7 @@ public:
bool hasOpenCLTextInMemory() const {
return TheOpenCLTextInMemorySpec != nullptr;
}
+ bool hasHostFunction() const { return HostFunction != nullptr; }
// Accessors for platform variant kernel load specifications.
//
@@ -233,6 +239,11 @@ public:
return *TheOpenCLTextInMemorySpec;
}
+ const HostFunctionTy &getHostFunction() const {
+ assert(hasHostFunction() && "getting spec that is not present");
+ return *HostFunction;
+ }
+
// Builder-pattern-like methods for use in initializing a
// MultiKernelLoaderSpec.
//
@@ -256,6 +267,12 @@ public:
MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
const char *OpenCLText);
+ MultiKernelLoaderSpec &addHostFunction(llvm::StringRef KernelName,
+ HostFunctionTy Function) {
+ HostFunction = llvm::make_unique<HostFunctionTy>(std::move(Function));
+ return *this;
+ }
+
private:
void setKernelName(llvm::StringRef KernelName);
@@ -263,6 +280,7 @@ private:
std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
+ std::unique_ptr<HostFunctionTy> HostFunction;
};
} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h
index cc1ae405bbb..d55680dd58e 100644
--- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h
+++ b/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h
@@ -149,10 +149,10 @@ public:
/// Similar to synchronousCopyD2H(const void *, size_t, void
/// *, size_t, size_t), but copies memory from one location in device memory
/// to another rather than from device to host.
- virtual Error synchronousCopyD2D(const void *DeviceDstHandle,
- size_t DstByteOffset,
- const void *DeviceSrcHandle,
- size_t SrcByteOffset, size_t ByteCount) {
+ virtual Error synchronousCopyD2D(const void *DeviceSrcHandle,
+ size_t SrcByteOffset,
+ const void *DeviceDstHandle,
+ size_t DstByteOffset, size_t ByteCount) {
return make_error("synchronousCopyD2D not implemented for platform " +
getName());
}
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h
new file mode 100644
index 00000000000..52ad1ead5da
--- /dev/null
+++ b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h
@@ -0,0 +1,56 @@
+//===-- HostPlatform.h - Host platform subclass -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Declaration of the HostPlatform class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H
+#define STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H
+
+#include "HostPlatformDevice.h"
+#include "streamexecutor/Device.h"
+#include "streamexecutor/Platform.h"
+
+#include "llvm/Support/Mutex.h"
+
+namespace streamexecutor {
+namespace host {
+
+/// Platform that performs work on the host rather than offloading to an
+/// accelerator.
+class HostPlatform : public Platform {
+public:
+ size_t getDeviceCount() const override { return 1; }
+
+ Expected<Device *> getDevice(size_t DeviceIndex) override {
+ if (DeviceIndex != 0) {
+ return make_error(
+ "Requested device index " + llvm::Twine(DeviceIndex) +
+ " from host platform which only supports device index 0");
+ }
+ llvm::sys::ScopedLock Lock(Mutex);
+ if (!TheDevice) {
+ ThePlatformDevice = llvm::make_unique<HostPlatformDevice>();
+ TheDevice = llvm::make_unique<Device>(ThePlatformDevice.get());
+ }
+ return TheDevice.get();
+ }
+
+private:
+ llvm::sys::Mutex Mutex;
+ std::unique_ptr<HostPlatformDevice> ThePlatformDevice;
+ std::unique_ptr<Device> TheDevice;
+};
+
+} // namespace host
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h
new file mode 100644
index 00000000000..e51552d1d2d
--- /dev/null
+++ b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h
@@ -0,0 +1,151 @@
+//===-- HostPlatformDevice.h - HostPlatformDevice class ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Declaration of the HostPlatformDevice class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H
+#define STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H
+
+#include <cstdlib>
+#include <cstring>
+
+#include "streamexecutor/PlatformDevice.h"
+
+namespace streamexecutor {
+namespace host {
+
+/// A concrete PlatformDevice subclass that performs its work on the host rather
+/// than offloading to an accelerator.
+class HostPlatformDevice : public PlatformDevice {
+public:
+ std::string getName() const override { return "host"; }
+
+ Expected<const void *>
+ createKernel(const MultiKernelLoaderSpec &Spec) override {
+ if (!Spec.hasHostFunction()) {
+ return make_error("no host implementation available for kernel " +
+ Spec.getKernelName());
+ }
+ return static_cast<const void *>(&Spec.getHostFunction());
+ }
+
+ Error destroyKernel(const void *Handle) override { return Error::success(); }
+
+ Expected<const void *> createStream() override {
+ // TODO(jhen): Do something with threads to allow multiple streams.
+ return this;
+ }
+
+ Error destroyStream(const void *Handle) override { return Error::success(); }
+
+ Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize,
+ GridDimensions GridSize, const void *PKernelHandle,
+ const PackedKernelArgumentArrayBase &ArgumentArray) override {
+ // TODO(jhen): Can we do something with BlockSize and GridSize?
+ if (!(BlockSize.X == 1 && BlockSize.Y == 1 && BlockSize.Z == 1)) {
+ return make_error(
+ "Block dimensions were (" + llvm::Twine(BlockSize.X) + "," +
+ llvm::Twine(BlockSize.Y) + "," + llvm::Twine(BlockSize.Z) +
+ "), but only size (1,1,1) is permitted for this platform");
+ }
+ if (!(GridSize.X == 1 && GridSize.Y == 1 && GridSize.Z == 1)) {
+ return make_error(
+ "Grid dimensions were (" + llvm::Twine(GridSize.X) + "," +
+ llvm::Twine(GridSize.Y) + "," + llvm::Twine(GridSize.Z) +
+ "), but only size (1,1,1) is permitted for this platform");
+ }
+
+ (*static_cast<const std::function<void(const void *const *)> *>(
+ PKernelHandle))(ArgumentArray.getAddresses());
+ return Error::success();
+ }
+
+ Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
+ size_t SrcByteOffset, void *HostDst, size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(offset(HostDst, DstByteOffset),
+ offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
+ return Error::success();
+ }
+
+ Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
+ size_t SrcByteOffset, const void *DeviceDstHandle,
+ size_t DstByteOffset, size_t ByteCount) override {
+ std::memcpy(offset(DeviceDstHandle, DstByteOffset),
+ offset(HostSrc, SrcByteOffset), ByteCount);
+ return Error::success();
+ }
+
+ Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
+ size_t SrcByteOffset, const void *DeviceDstHandle,
+ size_t DstByteOffset, size_t ByteCount) override {
+ std::memcpy(offset(DeviceDstHandle, DstByteOffset),
+ offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
+ return Error::success();
+ }
+
+ Error blockHostUntilDone(const void *PlatformStreamHandle) override {
+ // All host operations are synchronous anyway.
+ return Error::success();
+ }
+
+ Expected<void *> allocateDeviceMemory(size_t ByteCount) override {
+ return std::malloc(ByteCount);
+ }
+
+ Error freeDeviceMemory(const void *Handle) override {
+ std::free(const_cast<void *>(Handle));
+ return Error::success();
+ }
+
+ Error registerHostMemory(void *Memory, size_t ByteCount) override {
+ return Error::success();
+ }
+
+ Error unregisterHostMemory(const void *Memory) override {
+ return Error::success();
+ }
+
+ Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset,
+ void *HostDst, size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(offset(HostDst, DstByteOffset),
+ offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
+ return Error::success();
+ }
+
+ Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
+ const void *DeviceDstHandle, size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(offset(DeviceDstHandle, DstByteOffset),
+ offset(HostSrc, SrcByteOffset), ByteCount);
+ return Error::success();
+ }
+
+ Error synchronousCopyD2D(const void *DeviceSrcHandle, size_t SrcByteOffset,
+ const void *DeviceDstHandle, size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(offset(DeviceDstHandle, DstByteOffset),
+ offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
+ return Error::success();
+ }
+
+private:
+ static void *offset(const void *Base, size_t Offset) {
+ return const_cast<char *>(static_cast<const char *>(Base) + Offset);
+ }
+};
+
+} // namespace host
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H
diff --git a/parallel-libs/streamexecutor/lib/PlatformManager.cpp b/parallel-libs/streamexecutor/lib/PlatformManager.cpp
index 9cae5b1ea4b..7304cca755c 100644
--- a/parallel-libs/streamexecutor/lib/PlatformManager.cpp
+++ b/parallel-libs/streamexecutor/lib/PlatformManager.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "streamexecutor/PlatformManager.h"
+#include "streamexecutor/platforms/host/HostPlatform.h"
namespace streamexecutor {
@@ -23,6 +24,8 @@ PlatformManager::PlatformManager() {
// appropriate code to include here.
// * Use static initialization tricks to have platform libraries register
// themselves when they are loaded.
+
+ PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>());
}
Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) {
OpenPOWER on IntegriCloud