diff options
Diffstat (limited to 'clang/lib/Driver/ToolChains/HIP.cpp')
-rw-r--r-- | clang/lib/Driver/ToolChains/HIP.cpp | 41 |
1 files changed, 39 insertions, 2 deletions
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 6efcfaee8fd..58e8e79420d 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -184,6 +184,40 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs)); } +// Construct a clang-offload-bundler command to bundle code objects for +// different GPU's into a HIP fat binary. +void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, + StringRef OutputFileName, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, const Tool& T) { + // Construct clang-offload-bundler command to bundle object files for + // for different GPU archs. + ArgStringList BundlerArgs; + BundlerArgs.push_back(Args.MakeArgString("-type=o")); + + // ToDo: Remove the dummy host binary entry which is required by + // clang-offload-bundler. + std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; + std::string BundlerInputArg = "-inputs=/dev/null"; + + for (const auto &II : Inputs) { + const auto* A = II.getAction(); + BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" + + StringRef(A->getOffloadingArch()).str(); + BundlerInputArg = BundlerInputArg + "," + II.getFilename(); + } + BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); + BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); + + auto BundlerOutputArg = + Args.MakeArgString(std::string("-outputs=").append(OutputFileName)); + BundlerArgs.push_back(BundlerOutputArg); + + SmallString<128> BundlerPath(C.getDriver().Dir); + llvm::sys::path::append(BundlerPath, "clang-offload-bundler"); + const char *Bundler = Args.MakeArgString(BundlerPath); + C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs)); +} + // For amdgcn the inputs of the linker job are device bitcode and output is // object file. It calls llvm-link, opt, llc, then lld steps. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -192,6 +226,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, const ArgList &Args, const char *LinkingOutput) const { + if (JA.getType() == types::TY_HIP_FATBIN) + return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); + assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && "Unsupported target"); @@ -244,9 +281,9 @@ void HIPToolChain::addClangTargetOptions( options::OPT_fno_cuda_approx_transcendentals, false)) CC1Args.push_back("-fcuda-approx-transcendentals"); - if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, + if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) - CC1Args.push_back("-fcuda-rdc"); + CC1Args.push_back("-fgpu-rdc"); // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. |