summaryrefslogtreecommitdiffstats
path: root/parallel-libs/streamexecutor/unittests/CoreTests
diff options
context:
space:
mode:
Diffstat (limited to 'parallel-libs/streamexecutor/unittests/CoreTests')
-rw-r--r--parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt7
-rw-r--r--parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp414
-rw-r--r--parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp132
-rw-r--r--parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp150
-rw-r--r--parallel-libs/streamexecutor/unittests/CoreTests/SimpleHostPlatformDevice.h148
-rw-r--r--parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp362
6 files changed, 1213 insertions, 0 deletions
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt b/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt
new file mode 100644
index 00000000000..3365dd07e76
--- /dev/null
+++ b/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_se_unittest(
+ CoreTests
+ DeviceTest.cpp
+ KernelSpecTest.cpp
+ PackedKernelArgumentArrayTest.cpp
+ StreamTest.cpp
+)
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp
new file mode 100644
index 00000000000..5b16c3c865c
--- /dev/null
+++ b/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp
@@ -0,0 +1,414 @@
+//===-- DeviceTest.cpp - Tests for Device ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the unit tests for Device code.
+///
+//===----------------------------------------------------------------------===//
+
+#include <cstdlib>
+#include <cstring>
+
+#include "SimpleHostPlatformDevice.h"
+#include "streamexecutor/Device.h"
+#include "streamexecutor/PlatformDevice.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+const auto &getDeviceValue =
+ se::test::SimpleHostPlatformDevice::getDeviceValue<int>;
+
+/// Test fixture to hold objects used by tests.
+class DeviceTest : public ::testing::Test {
+public:
+ DeviceTest()
+ : Device(&PDevice), HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
+ HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
+ DeviceA5(getOrDie(Device.allocateDeviceMemory<int>(5))),
+ DeviceB5(getOrDie(Device.allocateDeviceMemory<int>(5))),
+ DeviceA7(getOrDie(Device.allocateDeviceMemory<int>(7))),
+ DeviceB7(getOrDie(Device.allocateDeviceMemory<int>(7))),
+ Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35} {
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostA5, DeviceA5));
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostB5, DeviceB5));
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostA7, DeviceA7));
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostB7, DeviceB7));
+ }
+
+ se::test::SimpleHostPlatformDevice PDevice;
+ se::Device Device;
+
+ // Device memory is backed by host arrays.
+ int HostA5[5];
+ int HostB5[5];
+ int HostA7[7];
+ int HostB7[7];
+ se::GlobalDeviceMemory<int> DeviceA5;
+ se::GlobalDeviceMemory<int> DeviceB5;
+ se::GlobalDeviceMemory<int> DeviceA7;
+ se::GlobalDeviceMemory<int> DeviceB7;
+
+ // Host memory to be used as actual host memory.
+ int Host5[5];
+ int Host7[7];
+};
+
+#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
+#define EXPECT_ERROR(E) \
+ do { \
+ se::Error E__ = E; \
+ EXPECT_TRUE(static_cast<bool>(E__)); \
+ consumeError(std::move(E__)); \
+ } while (false)
+
+using llvm::ArrayRef;
+using llvm::MutableArrayRef;
+
+TEST_F(DeviceTest, GetName) {
+ EXPECT_EQ(Device.getName(), "SimpleHostPlatformDevice");
+}
+
+TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) {
+ se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
+ Device.allocateDeviceMemory<int>(10);
+ EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+}
+
+TEST_F(DeviceTest, AllocateAndFreeHostMemory) {
+ se::Expected<int *> MaybeMemory = Device.allocateHostMemory<int>(10);
+ EXPECT_TRUE(static_cast<bool>(MaybeMemory));
+ EXPECT_NO_ERROR(Device.freeHostMemory(*MaybeMemory));
+}
+
+TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) {
+ std::vector<int> Data(10);
+ EXPECT_NO_ERROR(Device.registerHostMemory(Data.data(), 10));
+ EXPECT_NO_ERROR(Device.unregisterHostMemory(Data.data()));
+}
+
+// D2H tests
+
+TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(
+ DeviceA5.asSlice().drop_front(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
+ MutableArrayRef<int>(Host5), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(),
+ MutableArrayRef<int>(Host5), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+ MutableArrayRef<int>(Host7), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+ MutableArrayRef<int>(Host5), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
+ MutableArrayRef<int>(Host5)));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA7[I + 1], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
+ MutableArrayRef<int>(Host5)));
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
+ MutableArrayRef<int>(Host7)));
+}
+
+TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice().drop_front(1),
+ Host5 + 1, 4));
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
+}
+
+// H2D tests
+
+TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DToArrayRef) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(
+ ArrayRef<int>(Host5 + 1, 4), DeviceA5.asSlice().drop_front(1), 4));
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(
+ ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
+}
+
+TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) {
+ EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
+}
+
+// D2D tests
+
+TEST_F(DeviceTest, SyncCopyD2DByCount) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2D) {
+ EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4));
+ for (int I = 0; I < 4; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I + 1), getDeviceValue(DeviceB5, I));
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2D) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB5, I));
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice().drop_front(1), DeviceB5));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2DSliceByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I + 2));
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopyD2DSlice) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I));
+ }
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
+
+ EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
+}
+
+TEST_F(DeviceTest, SyncCopySliceD2DSlice) {
+ EXPECT_NO_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
+
+ EXPECT_ERROR(
+ Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
+}
+
+} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
new file mode 100644
index 00000000000..fc9eb549968
--- /dev/null
+++ b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
@@ -0,0 +1,132 @@
+//===-- KernelSpecTest.cpp - Tests for KernelSpec -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the unit tests for the code in KernelSpec.
+///
+//===----------------------------------------------------------------------===//
+
+#include "streamexecutor/KernelSpec.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+TEST(CUDAPTXInMemorySpec, NoCode) {
+ se::CUDAPTXInMemorySpec Spec("KernelName", {});
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(nullptr, Spec.getCode(1, 0));
+}
+
+TEST(CUDAPTXInMemorySpec, SingleComputeCapability) {
+ const char *PTXCodeString = "Dummy PTX code";
+ se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
+ EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+}
+
+TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
+ const char *PTXCodeString10 = "Dummy PTX code 10";
+ const char *PTXCodeString30 = "Dummy PTX code 30";
+ se::CUDAPTXInMemorySpec Spec(
+ "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
+ EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
+ EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+}
+
+TEST(CUDAFatbinInMemorySpec, BasicUsage) {
+ const char *FatbinBytes = "Dummy fatbin bytes";
+ se::CUDAFatbinInMemorySpec Spec("KernelName", FatbinBytes);
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(FatbinBytes, Spec.getBytes());
+}
+
+TEST(OpenCLTextInMemorySpec, BasicUsage) {
+ const char *OpenCLText = "Dummy OpenCL text";
+ se::OpenCLTextInMemorySpec Spec("KernelName", OpenCLText);
+ EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(OpenCLText, Spec.getText());
+}
+
+TEST(MultiKernelLoaderSpec, NoCode) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ EXPECT_FALSE(MultiSpec.hasCUDAPTXInMemory());
+ EXPECT_FALSE(MultiSpec.hasCUDAFatbinInMemory());
+ EXPECT_FALSE(MultiSpec.hasOpenCLTextInMemory());
+
+ EXPECT_DEBUG_DEATH(MultiSpec.getCUDAPTXInMemory(),
+ "getting spec that is not present");
+ EXPECT_DEBUG_DEATH(MultiSpec.getCUDAFatbinInMemory(),
+ "getting spec that is not present");
+ EXPECT_DEBUG_DEATH(MultiSpec.getOpenCLTextInMemory(),
+ "getting spec that is not present");
+}
+
+TEST(MultiKernelLoaderSpec, Registration) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ const char *KernelName = "KernelName";
+ const char *PTXCodeString = "Dummy PTX code";
+ const char *FatbinBytes = "Dummy fatbin bytes";
+ const char *OpenCLText = "Dummy OpenCL text";
+
+ MultiSpec.addCUDAPTXInMemory(KernelName, {{{1, 0}, PTXCodeString}})
+ .addCUDAFatbinInMemory(KernelName, FatbinBytes)
+ .addOpenCLTextInMemory(KernelName, OpenCLText);
+
+ EXPECT_TRUE(MultiSpec.hasCUDAPTXInMemory());
+ EXPECT_TRUE(MultiSpec.hasCUDAFatbinInMemory());
+ EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
+
+ EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+ EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
+ EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+
+ EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
+ EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
+
+ EXPECT_EQ(KernelName, MultiSpec.getOpenCLTextInMemory().getKernelName());
+ EXPECT_EQ(OpenCLText, MultiSpec.getOpenCLTextInMemory().getText());
+}
+
+TEST(MultiKernelLoaderSpec, RegisterTwice) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ const char *KernelName = "KernelName";
+ const char *FatbinBytes = "Dummy fatbin bytes";
+
+ MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes);
+
+ EXPECT_DEBUG_DEATH(MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes),
+ "illegal loader spec overwrite");
+}
+
+TEST(MultiKernelLoaderSpec, ConflictingKernelNames) {
+ se::MultiKernelLoaderSpec MultiSpec;
+ const char *KernelNameA = "KernelName";
+ std::string KernelNameB = KernelNameA;
+ const char *PTXCodeString = "Dummy PTX code";
+ const char *FatbinBytes = "Dummy fatbin bytes";
+
+ // Check that names don't conflict if they are equivalent strings in different
+ // locations.
+ MultiSpec.addCUDAPTXInMemory(KernelNameA, {{{1, 0}, PTXCodeString}})
+ .addCUDAFatbinInMemory(KernelNameB, FatbinBytes);
+
+ const char *OtherKernelName = "OtherKernelName";
+ const char *OpenCLText = "Dummy OpenCL text";
+ EXPECT_DEBUG_DEATH(
+ MultiSpec.addOpenCLTextInMemory(OtherKernelName, OpenCLText),
+ "different kernel names in one MultiKernelLoaderSpec");
+}
+
+} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp
new file mode 100644
index 00000000000..dd6d0e1c655
--- /dev/null
+++ b/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp
@@ -0,0 +1,150 @@
+//===-- PackedKernelArgumentArrayTest.cpp - tests for kernel arg packing --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Unit tests for kernel argument packing.
+///
+//===----------------------------------------------------------------------===//
+
+#include "SimpleHostPlatformDevice.h"
+#include "streamexecutor/Device.h"
+#include "streamexecutor/DeviceMemory.h"
+#include "streamexecutor/PackedKernelArgumentArray.h"
+#include "streamexecutor/PlatformDevice.h"
+
+#include "llvm/ADT/Twine.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+using Type = se::KernelArgumentType;
+
+// Test fixture class for testing argument packing.
+//
+// Basically defines a bunch of types to be packed so they don't have to be
+// defined separately in each test.
+class DeviceMemoryPackingTest : public ::testing::Test {
+public:
+ DeviceMemoryPackingTest()
+ : Device(&PDevice), Value(42), Handle(&Value), ByteCount(15),
+ ElementCount(5),
+ TypedGlobal(getOrDie(Device.allocateDeviceMemory<int>(ElementCount))),
+ TypedShared(
+ se::SharedDeviceMemory<int>::makeFromElementCount(ElementCount)) {}
+
+ se::test::SimpleHostPlatformDevice PDevice;
+ se::Device Device;
+ int Value;
+ void *Handle;
+ size_t ByteCount;
+ size_t ElementCount;
+ se::GlobalDeviceMemory<int> TypedGlobal;
+ se::SharedDeviceMemory<int> TypedShared;
+};
+
+// Utility method to check the expected address, size, and type for a packed
+// argument at the given index of a PackedKernelArgumentArray.
+template <typename... ParameterTs>
+static void
+ExpectEqual(const void *ExpectedAddress, size_t ExpectedSize, Type ExpectedType,
+ const se::PackedKernelArgumentArray<ParameterTs...> &Observed,
+ size_t Index) {
+ SCOPED_TRACE(("Index = " + llvm::Twine(Index)).str());
+ EXPECT_EQ(ExpectedAddress, Observed.getAddress(Index));
+ EXPECT_EQ(ExpectedAddress, Observed.getAddresses()[Index]);
+ EXPECT_EQ(ExpectedSize, Observed.getSize(Index));
+ EXPECT_EQ(ExpectedSize, Observed.getSizes()[Index]);
+ EXPECT_EQ(ExpectedType, Observed.getType(Index));
+ EXPECT_EQ(ExpectedType, Observed.getTypes()[Index]);
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleValue) {
+ auto Array = se::make_kernel_argument_pack(Value);
+ ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(0u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleTypedGlobal) {
+ auto Array = se::make_kernel_argument_pack(TypedGlobal);
+ ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+ Type::GLOBAL_DEVICE_MEMORY, Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(0u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleTypedGlobalPointer) {
+ auto Array = se::make_kernel_argument_pack(&TypedGlobal);
+ ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+ Type::GLOBAL_DEVICE_MEMORY, Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(0u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleConstTypedGlobalPointer) {
+ const se::GlobalDeviceMemory<int> *ArgumentPointer = &TypedGlobal;
+ auto Array = se::make_kernel_argument_pack(ArgumentPointer);
+ ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+ Type::GLOBAL_DEVICE_MEMORY, Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(0u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleTypedShared) {
+ auto Array = se::make_kernel_argument_pack(TypedShared);
+ ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
+ Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(1u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleTypedSharedPointer) {
+ auto Array = se::make_kernel_argument_pack(&TypedShared);
+ ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
+ Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(1u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, SingleConstTypedSharedPointer) {
+ const se::SharedDeviceMemory<int> *ArgumentPointer = &TypedShared;
+ auto Array = se::make_kernel_argument_pack(ArgumentPointer);
+ ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
+ Array, 0);
+ EXPECT_EQ(1u, Array.getArgumentCount());
+ EXPECT_EQ(1u, Array.getSharedCount());
+}
+
+TEST_F(DeviceMemoryPackingTest, PackSeveralArguments) {
+ const se::GlobalDeviceMemory<int> *TypedGlobalPointer = &TypedGlobal;
+ const se::SharedDeviceMemory<int> *TypedSharedPointer = &TypedShared;
+ auto Array = se::make_kernel_argument_pack(Value, TypedGlobal, &TypedGlobal,
+ TypedGlobalPointer, TypedShared,
+ &TypedShared, TypedSharedPointer);
+ ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0);
+ ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+ Type::GLOBAL_DEVICE_MEMORY, Array, 1);
+ ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+ Type::GLOBAL_DEVICE_MEMORY, Array, 2);
+ ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+ Type::GLOBAL_DEVICE_MEMORY, Array, 3);
+ ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
+ Array, 4);
+ ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
+ Array, 5);
+ ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
+ Array, 6);
+ EXPECT_EQ(7u, Array.getArgumentCount());
+ EXPECT_EQ(3u, Array.getSharedCount());
+}
+
+} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/SimpleHostPlatformDevice.h b/parallel-libs/streamexecutor/unittests/CoreTests/SimpleHostPlatformDevice.h
new file mode 100644
index 00000000000..5c5953098c4
--- /dev/null
+++ b/parallel-libs/streamexecutor/unittests/CoreTests/SimpleHostPlatformDevice.h
@@ -0,0 +1,148 @@
+//===-- SimpleHostPlatformDevice.h - Host device for testing ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// The SimpleHostPlatformDevice class is a streamexecutor::PlatformDevice that
+/// is really just the host processor and memory. It is useful for testing
+/// because no extra device platform is required.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef STREAMEXECUTOR_LIB_UNITTESTS_SIMPLEHOSTPLATFORMDEVICE_H
+#define STREAMEXECUTOR_LIB_UNITTESTS_SIMPLEHOSTPLATFORMDEVICE_H
+
+#include <cstdlib>
+#include <cstring>
+
+#include "streamexecutor/PlatformDevice.h"
+
+namespace streamexecutor {
+namespace test {
+
+/// A streamexecutor::PlatformDevice that simply forwards all operations to the
+/// host platform.
+///
+/// The allocate and copy methods are simple wrappers for std::malloc and
+/// std::memcpy.
+class SimpleHostPlatformDevice : public streamexecutor::PlatformDevice {
+public:
+ std::string getName() const override { return "SimpleHostPlatformDevice"; }
+
+ streamexecutor::Expected<const void *> createStream() override {
+ return nullptr;
+ }
+
+ streamexecutor::Expected<void *>
+ allocateDeviceMemory(size_t ByteCount) override {
+ return std::malloc(ByteCount);
+ }
+
+ streamexecutor::Error freeDeviceMemory(const void *Handle) override {
+ std::free(const_cast<void *>(Handle));
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Expected<void *>
+ allocateHostMemory(size_t ByteCount) override {
+ return std::malloc(ByteCount);
+ }
+
+ streamexecutor::Error freeHostMemory(void *Memory) override {
+ std::free(const_cast<void *>(Memory));
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error registerHostMemory(void *Memory,
+ size_t ByteCount) override {
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error unregisterHostMemory(void *Memory) override {
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error copyD2H(const void *StreamHandle,
+ const void *DeviceHandleSrc,
+ size_t SrcByteOffset, void *HostDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
+ static_cast<const char *>(DeviceHandleSrc) + SrcByteOffset,
+ ByteCount);
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error copyH2D(const void *StreamHandle, const void *HostSrc,
+ size_t SrcByteOffset,
+ const void *DeviceHandleDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(const_cast<void *>(DeviceHandleDst)) +
+ DstByteOffset,
+ static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error
+ copyD2D(const void *StreamHandle, const void *DeviceHandleSrc,
+ size_t SrcByteOffset, const void *DeviceHandleDst,
+ size_t DstByteOffset, size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(const_cast<void *>(DeviceHandleDst)) +
+ DstByteOffset,
+ static_cast<const char *>(DeviceHandleSrc) + SrcByteOffset,
+ ByteCount);
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error synchronousCopyD2H(const void *DeviceHandleSrc,
+ size_t SrcByteOffset, void *HostDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(HostDst) + DstByteOffset,
+ static_cast<const char *>(DeviceHandleSrc) + SrcByteOffset,
+ ByteCount);
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error synchronousCopyH2D(const void *HostSrc,
+ size_t SrcByteOffset,
+ const void *DeviceHandleDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(const_cast<void *>(DeviceHandleDst)) +
+ DstByteOffset,
+ static_cast<const char *>(HostSrc) + SrcByteOffset, ByteCount);
+ return streamexecutor::Error::success();
+ }
+
+ streamexecutor::Error synchronousCopyD2D(const void *DeviceHandleSrc,
+ size_t SrcByteOffset,
+ const void *DeviceHandleDst,
+ size_t DstByteOffset,
+ size_t ByteCount) override {
+ std::memcpy(static_cast<char *>(const_cast<void *>(DeviceHandleDst)) +
+ DstByteOffset,
+ static_cast<const char *>(DeviceHandleSrc) + SrcByteOffset,
+ ByteCount);
+ return streamexecutor::Error::success();
+ }
+
+ /// Gets the value at the given index from a GlobalDeviceMemory<T> instance
+ /// created by this class.
+ template <typename T>
+ static T getDeviceValue(const streamexecutor::GlobalDeviceMemory<T> &Memory,
+ size_t Index) {
+ return static_cast<const T *>(Memory.getHandle())[Index];
+ }
+};
+
+} // namespace test
+} // namespace streamexecutor
+
+#endif // STREAMEXECUTOR_LIB_UNITTESTS_SIMPLEHOSTPLATFORMDEVICE_H
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp
new file mode 100644
index 00000000000..65598540d67
--- /dev/null
+++ b/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp
@@ -0,0 +1,362 @@
+//===-- StreamTest.cpp - Tests for Stream ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the unit tests for Stream code.
+///
+//===----------------------------------------------------------------------===//
+
+#include <cstring>
+
+#include "SimpleHostPlatformDevice.h"
+#include "streamexecutor/Device.h"
+#include "streamexecutor/Kernel.h"
+#include "streamexecutor/KernelSpec.h"
+#include "streamexecutor/PlatformDevice.h"
+#include "streamexecutor/Stream.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+namespace se = ::streamexecutor;
+
+const auto &getDeviceValue =
+ se::test::SimpleHostPlatformDevice::getDeviceValue<int>;
+
+/// Test fixture to hold objects used by tests.
+class StreamTest : public ::testing::Test {
+public:
+ StreamTest()
+ : DummyPlatformStream(1), Device(&PDevice),
+ Stream(&PDevice, &DummyPlatformStream), HostA5{0, 1, 2, 3, 4},
+ HostB5{5, 6, 7, 8, 9}, HostA7{10, 11, 12, 13, 14, 15, 16},
+ HostB7{17, 18, 19, 20, 21, 22, 23}, Host5{24, 25, 26, 27, 28},
+ Host7{29, 30, 31, 32, 33, 34, 35},
+ DeviceA5(getOrDie(Device.allocateDeviceMemory<int>(5))),
+ DeviceB5(getOrDie(Device.allocateDeviceMemory<int>(5))),
+ DeviceA7(getOrDie(Device.allocateDeviceMemory<int>(7))),
+ DeviceB7(getOrDie(Device.allocateDeviceMemory<int>(7))) {
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostA5, DeviceA5));
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostB5, DeviceB5));
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostA7, DeviceA7));
+ se::dieIfError(Device.synchronousCopyH2D<int>(HostB7, DeviceB7));
+ }
+
+protected:
+ int DummyPlatformStream; // Mimicking a platform where the platform stream
+ // handle is just a stream number.
+ se::test::SimpleHostPlatformDevice PDevice;
+ se::Device Device;
+ se::Stream Stream;
+
+ // Device memory is matched by host arrays.
+ int HostA5[5];
+ int HostB5[5];
+ int HostA7[7];
+ int HostB7[7];
+
+ // Host memory to be used as actual host memory.
+ int Host5[5];
+ int Host7[7];
+
+ // Device memory.
+ se::GlobalDeviceMemory<int> DeviceA5;
+ se::GlobalDeviceMemory<int> DeviceB5;
+ se::GlobalDeviceMemory<int> DeviceA7;
+ se::GlobalDeviceMemory<int> DeviceB7;
+};
+
+using llvm::ArrayRef;
+using llvm::MutableArrayRef;
+
+// D2H tests
+
+TEST_F(StreamTest, CopyD2HToMutableArrayRefByCount) {
+ Stream.thenCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2HToMutableArrayRef) {
+ Stream.thenCopyD2H(DeviceA5, MutableArrayRef<int>(Host5));
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceA5, MutableArrayRef<int>(Host7));
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2HToPointer) {
+ Stream.thenCopyD2H(DeviceA5, Host5, 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceA5, Host7, 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2HSliceToMutableArrayRefByCount) {
+ Stream.thenCopyD2H(DeviceA5.asSlice().drop_front(1),
+ MutableArrayRef<int>(Host5 + 1, 4), 4);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceB5.asSlice().drop_back(1),
+ MutableArrayRef<int>(Host5), 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(HostB5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceA5.asSlice(), MutableArrayRef<int>(Host7), 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2HSliceToMutableArrayRef) {
+ Stream.thenCopyD2H(DeviceA7.asSlice().slice(1, 5),
+ MutableArrayRef<int>(Host5));
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(HostA7[I + 1], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceA5.asSlice(), MutableArrayRef<int>(Host7));
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2HSliceToPointer) {
+ Stream.thenCopyD2H(DeviceA5.asSlice().drop_front(1), Host5 + 1, 4);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(HostA5[I], Host5[I]);
+ }
+
+ Stream.thenCopyD2H(DeviceA5.asSlice(), Host7, 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+// H2D tests
+
+TEST_F(StreamTest, CopyH2DToArrayRefByCount) {
+ Stream.thenCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyH2DToArrayRef) {
+ Stream.thenCopyH2D(ArrayRef<int>(Host5), DeviceA5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host7), DeviceA5);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyH2DToPointer) {
+ Stream.thenCopyH2D(Host5, DeviceA5, 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(Host7, DeviceA5, 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyH2DSliceToArrayRefByCount) {
+ Stream.thenCopyH2D(ArrayRef<int>(Host5 + 1, 4),
+ DeviceA5.asSlice().drop_front(1), 4);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 1; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyH2DSliceToArrayRef) {
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice());
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice());
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyH2DSliceToPointer) {
+ Stream.thenCopyH2D(Host5, DeviceA5.asSlice(), 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
+ }
+
+ Stream.thenCopyH2D(Host7, DeviceA5.asSlice(), 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+// D2D tests
+
+TEST_F(StreamTest, CopyD2DByCount) {
+ Stream.thenCopyD2D(DeviceA5, DeviceB5, 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA7, DeviceB7, 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA7, DeviceB5, 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2D) {
+ Stream.thenCopyD2D(DeviceA5, DeviceB5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA7, DeviceB5);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopySliceD2DByCount) {
+ Stream.thenCopyD2D(DeviceA5.asSlice().drop_front(1), DeviceB5, 4);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 4; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I + 1), getDeviceValue(DeviceB5, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5, 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopySliceD2D) {
+
+ Stream.thenCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB5, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2DSliceByCount) {
+ Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice().drop_front(2), 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I + 2));
+ }
+
+ Stream.thenCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice(), 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopyD2DSlice) {
+
+ Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2));
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice());
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopySliceD2DSliceByCount) {
+
+ Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2);
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 2; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7);
+ EXPECT_FALSE(Stream.isOK());
+}
+
+TEST_F(StreamTest, CopySliceD2DSlice) {
+
+ Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice());
+ EXPECT_TRUE(Stream.isOK());
+ for (int I = 0; I < 5; ++I) {
+ EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
+ }
+
+ Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice());
+ EXPECT_FALSE(Stream.isOK());
+}
+
+} // namespace
OpenPOWER on IntegriCloud