diff options
Diffstat (limited to 'clang/lib/Driver/Tools.cpp')
-rw-r--r-- | clang/lib/Driver/Tools.cpp | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp index 5e4777b4666..e498f98355c 100644 --- a/clang/lib/Driver/Tools.cpp +++ b/clang/lib/Driver/Tools.cpp @@ -10625,3 +10625,81 @@ void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA, else ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); } + +void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + const auto &TC = + static_cast<const toolchains::CudaToolChain &>(getToolChain()); + assert(TC.getArch() == llvm::Triple::nvptx || + TC.getArch() == llvm::Triple::nvptx64); + + std::vector<std::string> gpu_archs = + Args.getAllArgValues(options::OPT_march_EQ); + assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas."); + const std::string& gpu_arch = gpu_archs[0]; + + + ArgStringList CmdArgs; + CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); + + // Clang's default optimization level is -O0, but ptxas's default is -O3. + CmdArgs.push_back(Args.MakeArgString( + llvm::Twine("-O") + + Args.getLastArgValue(options::OPT_O_Group, "0").data())); + + // Don't bother passing -g to ptxas: It's enabled by default at -O0, and + // not supported at other optimization levels. + + CmdArgs.push_back("--gpu-name"); + CmdArgs.push_back(Args.MakeArgString(gpu_arch)); + CmdArgs.push_back("--output-file"); + CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); + for (const auto& II : Inputs) + CmdArgs.push_back(Args.MakeArgString(II.getFilename())); + + for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) + CmdArgs.push_back(Args.MakeArgString(A)); + + const char *Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); + C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); +} + +// All inputs to this linker must be from CudaDeviceActions, as we need to look +// at the Inputs' Actions in order to figure out which GPU architecture they +// correspond to. +void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + const auto &TC = + static_cast<const toolchains::CudaToolChain &>(getToolChain()); + assert(TC.getArch() == llvm::Triple::nvptx || + TC.getArch() == llvm::Triple::nvptx64); + + ArgStringList CmdArgs; + CmdArgs.push_back("--cuda"); + CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); + CmdArgs.push_back(Args.MakeArgString("--create")); + CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); + + for (const auto& II : Inputs) { + auto* A = cast<const CudaDeviceAction>(II.getAction()); + // We need to pass an Arch of the form "sm_XX" for cubin files and + // "compute_XX" for ptx. + const char *Arch = (II.getType() == types::TY_PP_Asm) + ? A->getComputeArchName() + : A->getGpuArchName(); + CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + + Arch + ",file=" + II.getFilename())); + } + + for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) + CmdArgs.push_back(Args.MakeArgString(A)); + + const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); + C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); +} |