diff options
| author | Jason Henline <jhen@google.com> | 2016-09-13 23:29:25 +0000 |
|---|---|---|
| committer | Jason Henline <jhen@google.com> | 2016-09-13 23:29:25 +0000 |
| commit | b459eb352986891bb0ec4c146954c2627ed6fc8e (patch) | |
| tree | 0abd56e6e3175221fff80b632f0d895d93108fa2 /parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp | |
| parent | 1291166c110ecda7efe7be9a36aff04f6bc46611 (diff) | |
| download | bcm5719-llvm-b459eb352986891bb0ec4c146954c2627ed6fc8e.tar.gz bcm5719-llvm-b459eb352986891bb0ec4c146954c2627ed6fc8e.zip | |
[SE] KernelSpec return best PTX
Summary:
Before, the kernel spec would only return PTX for exactly the requested
compute capability. With this patch it will now return the PTX with the
largest compute capability that does not exceed that requested compute
capability.
Reviewers: jlebar
Subscribers: jprice, jlebar, parallel_libs-commits
Differential Revision: https://reviews.llvm.org/D24531
llvm-svn: 281417
Diffstat (limited to 'parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp')
| -rw-r--r-- | parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp index fc9eb549968..486a3504091 100644 --- a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp +++ b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp @@ -30,8 +30,9 @@ TEST(CUDAPTXInMemorySpec, SingleComputeCapability) { const char *PTXCodeString = "Dummy PTX code"; se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}}); EXPECT_EQ("KernelName", Spec.getKernelName()); + EXPECT_EQ(nullptr, Spec.getCode(0, 5)); EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0)); - EXPECT_EQ(nullptr, Spec.getCode(2, 0)); + EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0)); } TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) { @@ -40,9 +41,10 @@ TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) { se::CUDAPTXInMemorySpec Spec( "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}}); EXPECT_EQ("KernelName", Spec.getKernelName()); + EXPECT_EQ(nullptr, Spec.getCode(0, 5)); EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0)); EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0)); - EXPECT_EQ(nullptr, Spec.getCode(2, 0)); + EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0)); } TEST(CUDAFatbinInMemorySpec, BasicUsage) { @@ -89,8 +91,9 @@ TEST(MultiKernelLoaderSpec, Registration) { EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory()); EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName()); + EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5)); EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0)); - EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0)); + EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0)); EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName()); EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes()); |

