diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-10-02 17:48:54 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-10-02 17:48:54 +0000 |
commit | 9767089d003b52ec9d32b93c8533c815c9906902 (patch) | |
tree | e9788366c4929488c5cc4c31054fa4ebc749b0f9 /clang/lib/Driver/ToolChains/HIP.cpp | |
parent | 2b5259afb3ce81334fab0f4d7a741b85f465caf7 (diff) | |
download | bcm5719-llvm-9767089d003b52ec9d32b93c8533c815c9906902.tar.gz bcm5719-llvm-9767089d003b52ec9d32b93c8533c815c9906902.zip |
[HIP] Support early finalization of device code for -fno-gpu-rdc
This patch renames -f{no-}cuda-rdc to -f{no-}gpu-rdc and keeps the original
options as aliases. When -fgpu-rdc is off,
clang will assume the device code in each translation unit does not call
external functions except those in the device library, therefore it is possible
to compile the device code in each translation unit to self-contained kernels
and embed them in the host object, so that the host object behaves like
usual host object which can be linked by lld.
The benefits of this feature is: 1. allow users to create static libraries which
can be linked by host linker; 2. amortized device code linking time.
This patch modifies HIP action builder to insert actions for linking device
code and generating HIP fatbin, and pass HIP fatbin to host backend action.
It extracts code for constructing command for generating HIP fatbin as
a function so that it can be reused by early finalization. It also modifies
codegen of HIP host constructor functions to embed the device fatbin
when it is available.
Differential Revision: https://reviews.llvm.org/D52377
llvm-svn: 343611
Diffstat (limited to 'clang/lib/Driver/ToolChains/HIP.cpp')
-rw-r--r-- | clang/lib/Driver/ToolChains/HIP.cpp | 41 |
1 files changed, 39 insertions, 2 deletions
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 6efcfaee8fd..58e8e79420d 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -184,6 +184,40 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs)); } +// Construct a clang-offload-bundler command to bundle code objects for +// different GPU's into a HIP fat binary. +void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, + StringRef OutputFileName, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, const Tool& T) { + // Construct clang-offload-bundler command to bundle object files for + // for different GPU archs. + ArgStringList BundlerArgs; + BundlerArgs.push_back(Args.MakeArgString("-type=o")); + + // ToDo: Remove the dummy host binary entry which is required by + // clang-offload-bundler. + std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; + std::string BundlerInputArg = "-inputs=/dev/null"; + + for (const auto &II : Inputs) { + const auto* A = II.getAction(); + BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" + + StringRef(A->getOffloadingArch()).str(); + BundlerInputArg = BundlerInputArg + "," + II.getFilename(); + } + BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); + BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); + + auto BundlerOutputArg = + Args.MakeArgString(std::string("-outputs=").append(OutputFileName)); + BundlerArgs.push_back(BundlerOutputArg); + + SmallString<128> BundlerPath(C.getDriver().Dir); + llvm::sys::path::append(BundlerPath, "clang-offload-bundler"); + const char *Bundler = Args.MakeArgString(BundlerPath); + C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs)); +} + // For amdgcn the inputs of the linker job are device bitcode and output is // object file. It calls llvm-link, opt, llc, then lld steps. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -192,6 +226,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, const ArgList &Args, const char *LinkingOutput) const { + if (JA.getType() == types::TY_HIP_FATBIN) + return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); + assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && "Unsupported target"); @@ -244,9 +281,9 @@ void HIPToolChain::addClangTargetOptions( options::OPT_fno_cuda_approx_transcendentals, false)) CC1Args.push_back("-fcuda-approx-transcendentals"); - if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, + if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) - CC1Args.push_back("-fcuda-rdc"); + CC1Args.push_back("-fgpu-rdc"); // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. |