[CUDA] Invoke ptxas and fatbinary during compilation.

Summary: Previously we compiled CUDA device code to PTX assembly and embedded that asm as text in our host binary. Now we compile to PTX assembly and then invoke ptxas to assemble the PTX into a cubin file. We gather the ptx and cubin files for each of our --cuda-gpu-archs and combine them using fatbinary, and then embed that into the host binary. Adds two new command-line flags, -Xcuda_ptxas and -Xcuda_fatbinary, which pass args down to the external tools. Reviewers: tra, echristo Subscribers: cfe-commits, jhen Differential Revision: http://reviews.llvm.org/D16082 llvm-svn: 257809
author: Justin Lebar <jlebar@google.com> 2016-01-14 21:41:27 +0000
committer: Justin Lebar <jlebar@google.com> 2016-01-14 21:41:27 +0000
commit: 21e5d4fcfa4c8a2a198a968af5b7db1884b801b9 (patch)
tree: 678dba0dd9353eac0609f6eaaf55c43dbb6fc3cc /clang/lib/Driver/ToolChains.cpp
parent: b44f6fed4da5bc1d2983887f6ac933dd90f5c854 (diff)
download: bcm5719-llvm-21e5d4fcfa4c8a2a198a968af5b7db1884b801b9.tar.gz
bcm5719-llvm-21e5d4fcfa4c8a2a198a968af5b7db1884b801b9.zip
1 files changed, 20 insertions, 7 deletions
diff --git a/clang/lib/Driver/ToolChains.cpp b/clang/lib/Driver/ToolChains.cpp
index e3f25f1e32e..15b36778220 100644
--- a/clang/lib/Driver/ToolChains.cpp
+++ b/clang/lib/Driver/ToolChains.cpp
@@ -1652,13 +1652,14 @@ void Generic_GCC::CudaInstallationDetector::init(
       continue;
 
     CudaInstallPath = CudaPath;
+    CudaBinPath = CudaPath + "/bin";
     CudaIncludePath = CudaInstallPath + "/include";
     CudaLibDevicePath = CudaInstallPath + "/nvvm/libdevice";
     CudaLibPath =
         CudaInstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib");
 
     if (!(D.getVFS().exists(CudaIncludePath) &&
-          D.getVFS().exists(CudaLibPath) &&
+          D.getVFS().exists(CudaBinPath) && D.getVFS().exists(CudaLibPath) &&
           D.getVFS().exists(CudaLibDevicePath)))
       continue;
 
@@ -4182,13 +4183,16 @@ Tool *DragonFly::buildLinker() const {
   return new tools::dragonfly::Linker(*this);
 }
 
-/// Stub for CUDA toolchain. At the moment we don't have assembler or
-/// linker and need toolchain mainly to propagate device-side options
-/// to CC1.
+/// CUDA toolchain.  Our assembler is ptxas, and our "linker" is fatbinary,
+/// which isn't properly a linker but nonetheless performs the step of stitching
+/// together object files from the assembler into a single blob.
 
 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
                              const ArgList &Args)
-    : Linux(D, Triple, Args) {}
+    : Linux(D, Triple, Args) {
+  if (CudaInstallation.isValid())
+    getProgramPaths().push_back(CudaInstallation.getBinPath());
+}
 
 void
 CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
@@ -4222,7 +4226,7 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
   for (Arg *A : Args) {
     if (A->getOption().matches(options::OPT_Xarch__)) {
       // Skip this argument unless the architecture matches BoundArch
-      if (A->getValue(0) != StringRef(BoundArch))
+      if (!BoundArch || A->getValue(0) != StringRef(BoundArch))
         continue;
 
       unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
@@ -4253,10 +4257,19 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
     DAL->append(A);
   }
 
-  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
+  if (BoundArch)
+    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
   return DAL;
 }
 
+Tool *CudaToolChain::buildAssembler() const {
+  return new tools::NVPTX::Assembler(*this);
+}
+
+Tool *CudaToolChain::buildLinker() const {
+  return new tools::NVPTX::Linker(*this);
+}
+
 /// XCore tool chain
 XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
                                const ArgList &Args)
author	Justin Lebar <jlebar@google.com>	2016-01-14 21:41:27 +0000
committer	Justin Lebar <jlebar@google.com>	2016-01-14 21:41:27 +0000
commit	21e5d4fcfa4c8a2a198a968af5b7db1884b801b9 (patch)
tree	678dba0dd9353eac0609f6eaaf55c43dbb6fc3cc /clang/lib/Driver/ToolChains.cpp
parent	b44f6fed4da5bc1d2983887f6ac933dd90f5c854 (diff)
download	bcm5719-llvm-21e5d4fcfa4c8a2a198a968af5b7db1884b801b9.tar.gz bcm5719-llvm-21e5d4fcfa4c8a2a198a968af5b7db1884b801b9.zip