From b459eb352986891bb0ec4c146954c2627ed6fc8e Mon Sep 17 00:00:00 2001 From: Jason Henline Date: Tue, 13 Sep 2016 23:29:25 +0000 Subject: [SE] KernelSpec return best PTX Summary: Before, the kernel spec would only return PTX for exactly the requested compute capability. With this patch it will now return the PTX with the largest compute capability that does not exceed that requested compute capability. Reviewers: jlebar Subscribers: jprice, jlebar, parallel_libs-commits Differential Revision: https://reviews.llvm.org/D24531 llvm-svn: 281417 --- .../streamexecutor/unittests/CoreTests/KernelSpecTest.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp') diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp index fc9eb549968..486a3504091 100644 --- a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp +++ b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp @@ -30,8 +30,9 @@ TEST(CUDAPTXInMemorySpec, SingleComputeCapability) { const char *PTXCodeString = "Dummy PTX code"; se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}}); EXPECT_EQ("KernelName", Spec.getKernelName()); + EXPECT_EQ(nullptr, Spec.getCode(0, 5)); EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0)); - EXPECT_EQ(nullptr, Spec.getCode(2, 0)); + EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0)); } TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) { @@ -40,9 +41,10 @@ TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) { se::CUDAPTXInMemorySpec Spec( "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}}); EXPECT_EQ("KernelName", Spec.getKernelName()); + EXPECT_EQ(nullptr, Spec.getCode(0, 5)); EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0)); EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0)); - EXPECT_EQ(nullptr, Spec.getCode(2, 0)); + EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0)); } TEST(CUDAFatbinInMemorySpec, BasicUsage) { @@ -89,8 +91,9 @@ TEST(MultiKernelLoaderSpec, Registration) { EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory()); EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName()); + EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5)); EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0)); - EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0)); + EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0)); EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName()); EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes()); -- cgit v1.2.3