summaryrefslogtreecommitdiffstats
path: root/clang/lib/Driver/Tools.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Driver/Tools.cpp')
-rw-r--r--clang/lib/Driver/Tools.cpp78
1 files changed, 78 insertions, 0 deletions
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp
index 5e4777b4666..e498f98355c 100644
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@@ -10625,3 +10625,81 @@ void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA,
else
ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput);
}
+
+void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+ const auto &TC =
+ static_cast<const toolchains::CudaToolChain &>(getToolChain());
+ assert(TC.getArch() == llvm::Triple::nvptx ||
+ TC.getArch() == llvm::Triple::nvptx64);
+
+ std::vector<std::string> gpu_archs =
+ Args.getAllArgValues(options::OPT_march_EQ);
+ assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas.");
+ const std::string& gpu_arch = gpu_archs[0];
+
+
+ ArgStringList CmdArgs;
+ CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
+
+ // Clang's default optimization level is -O0, but ptxas's default is -O3.
+ CmdArgs.push_back(Args.MakeArgString(
+ llvm::Twine("-O") +
+ Args.getLastArgValue(options::OPT_O_Group, "0").data()));
+
+ // Don't bother passing -g to ptxas: It's enabled by default at -O0, and
+ // not supported at other optimization levels.
+
+ CmdArgs.push_back("--gpu-name");
+ CmdArgs.push_back(Args.MakeArgString(gpu_arch));
+ CmdArgs.push_back("--output-file");
+ CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+ for (const auto& II : Inputs)
+ CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
+
+ for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
+ CmdArgs.push_back(Args.MakeArgString(A));
+
+ const char *Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+}
+
+// All inputs to this linker must be from CudaDeviceActions, as we need to look
+// at the Inputs' Actions in order to figure out which GPU architecture they
+// correspond to.
+void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+ const auto &TC =
+ static_cast<const toolchains::CudaToolChain &>(getToolChain());
+ assert(TC.getArch() == llvm::Triple::nvptx ||
+ TC.getArch() == llvm::Triple::nvptx64);
+
+ ArgStringList CmdArgs;
+ CmdArgs.push_back("--cuda");
+ CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
+ CmdArgs.push_back(Args.MakeArgString("--create"));
+ CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+
+ for (const auto& II : Inputs) {
+ auto* A = cast<const CudaDeviceAction>(II.getAction());
+ // We need to pass an Arch of the form "sm_XX" for cubin files and
+ // "compute_XX" for ptx.
+ const char *Arch = (II.getType() == types::TY_PP_Asm)
+ ? A->getComputeArchName()
+ : A->getGpuArchName();
+ CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
+ Arch + ",file=" + II.getFilename()));
+ }
+
+ for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
+ CmdArgs.push_back(Args.MakeArgString(A));
+
+ const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+}
OpenPOWER on IntegriCloud