summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorSergey Dmitriev <serguei.n.dmitriev@intel.com>2019-10-09 20:42:58 +0000
committerSergey Dmitriev <serguei.n.dmitriev@intel.com>2019-10-09 20:42:58 +0000
commita0d83768f10849e5cf230391fac949dc5118c0a6 (patch)
tree8a882a19d1a47f42867913b64e648740579d0669 /clang/lib
parent868b47fcbef1a116adf6668f24c2c3fcbfd57701 (diff)
downloadbcm5719-llvm-a0d83768f10849e5cf230391fac949dc5118c0a6.tar.gz
bcm5719-llvm-a0d83768f10849e5cf230391fac949dc5118c0a6.zip
[Clang][OpenMP Offload] Add new tool for wrapping offload device binaries
This patch removes the remaining part of the OpenMP offload linker scripts which was used for inserting device binaries into the output linked binary. Device binaries are now inserted into the host binary with a help of the wrapper bit-code file which contains device binaries as data. Wrapper bit-code file is dynamically created by the clang driver with a help of new tool clang-offload-wrapper which takes device binaries as input and produces bit-code file with required contents. Wrapper bit-code is then compiled to an object and resulting object is appended to the host linking by the clang driver. This is the second part of the patch for eliminating OpenMP linker script (please see https://reviews.llvm.org/D64943). Differential Revision: https://reviews.llvm.org/D68166 llvm-svn: 374219
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Driver/Action.cpp8
-rw-r--r--clang/lib/Driver/Driver.cpp57
-rw-r--r--clang/lib/Driver/ToolChain.cpp9
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp54
-rw-r--r--clang/lib/Driver/ToolChains/Clang.h14
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.cpp114
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.h7
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp2
-rw-r--r--clang/lib/Driver/ToolChains/Gnu.cpp5
9 files changed, 128 insertions, 142 deletions
diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp
index 0187cf981eb..0eb4c7257e7 100644
--- a/clang/lib/Driver/Action.cpp
+++ b/clang/lib/Driver/Action.cpp
@@ -41,6 +41,8 @@ const char *Action::getClassName(ActionClass AC) {
return "clang-offload-bundler";
case OffloadUnbundlingJobClass:
return "clang-offload-unbundler";
+ case OffloadWrapperJobClass:
+ return "clang-offload-wrapper";
}
llvm_unreachable("invalid class");
@@ -407,3 +409,9 @@ void OffloadUnbundlingJobAction::anchor() {}
OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
: JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
+
+void OffloadWrapperJobAction::anchor() {}
+
+OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs,
+ types::ID Type)
+ : JobAction(OffloadWrapperJobClass, Inputs, Type) {}
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index a0952742982..51c262aa0a3 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2288,6 +2288,9 @@ class OffloadingActionBuilder final {
virtual void appendTopLevelActions(ActionList &AL) {}
/// Append linker actions generated by the builder.
+ virtual void appendLinkActions(ActionList &AL) {}
+
+ /// Append linker actions generated by the builder.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
/// Initialize the builder. Return true if any initialization errors are
@@ -2890,7 +2893,7 @@ class OffloadingActionBuilder final {
OpenMPDeviceActions.clear();
}
- void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+ void appendLinkActions(ActionList &AL) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
@@ -2899,12 +2902,18 @@ class OffloadingActionBuilder final {
for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
- DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
- Action::OFK_OpenMP);
+ OffloadAction::DeviceDependences DeviceLinkDeps;
+ DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
+ Action::OFK_OpenMP);
+ AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
+ DeviceLinkAction->getType()));
++TC;
}
+ DeviceLinkerInputs.clear();
}
+ void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
+
bool initialize() override {
// Get the OpenMP toolchains. If we don't get any, the action builder will
// know there is nothing to do related to OpenMP offloading.
@@ -3129,6 +3138,25 @@ public:
return false;
}
+ Action* makeHostLinkAction() {
+ // Build a list of device linking actions.
+ ActionList DeviceAL;
+ for (DeviceActionBuilder *SB : SpecializedBuilders) {
+ if (!SB->isValid())
+ continue;
+ SB->appendLinkActions(DeviceAL);
+ }
+
+ if (DeviceAL.empty())
+ return nullptr;
+
+ // Create wrapper bitcode from the result of device link actions and compile
+ // it to an object which will be added to the host link command.
+ auto *BC = C.MakeAction<OffloadWrapperJobAction>(DeviceAL, types::TY_LLVM_BC);
+ auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
+ return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
+ }
+
/// Processes the host linker action. This currently consists of replacing it
/// with an offload action if there are device link objects and propagate to
/// the host action all the offload kinds used in the current compilation. The
@@ -3434,6 +3462,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
// Add a link action if necessary.
if (!LinkerInputs.empty()) {
+ if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
+ LinkerInputs.push_back(Wrapper);
Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
LA = OffloadBuilder.processHostLinkAction(LA);
Actions.push_back(LA);
@@ -3820,18 +3850,8 @@ class ToolSelector final {
if (!AJ || !BJ)
return nullptr;
- // Retrieve the compile job, backend action must always be preceded by one.
- ActionList CompileJobOffloadActions;
- auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions,
- /*CanBeCollapsed=*/false);
- if (!AJ || !BJ || !CJ)
- return nullptr;
-
- assert(isa<CompileJobAction>(CJ) &&
- "Expecting compile job preceding backend job.");
-
- // Get compiler tool.
- const Tool *T = TC.SelectTool(*CJ);
+ // Get backend tool.
+ const Tool *T = TC.SelectTool(*BJ);
if (!T)
return nullptr;
@@ -4236,6 +4256,13 @@ InputInfo Driver::BuildJobsForActionNoCache(
A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
/*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
!AtTopLevel);
+ if (isa<OffloadWrapperJobAction>(JA)) {
+ OffloadingPrefix += "-wrapper";
+ if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
+ BaseInput = FinalOutput->getValue();
+ else
+ BaseInput = getDefaultImageName();
+ }
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
AtTopLevel, MultipleArchs,
OffloadingPrefix),
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index db2497a1085..357a5106ab3 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -292,6 +292,12 @@ Tool *ToolChain::getOffloadBundler() const {
return OffloadBundler.get();
}
+Tool *ToolChain::getOffloadWrapper() const {
+ if (!OffloadWrapper)
+ OffloadWrapper.reset(new tools::OffloadWrapper(*this));
+ return OffloadWrapper.get();
+}
+
Tool *ToolChain::getTool(Action::ActionClass AC) const {
switch (AC) {
case Action::AssembleJobClass:
@@ -324,6 +330,9 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
case Action::OffloadBundlingJobClass:
case Action::OffloadUnbundlingJobClass:
return getOffloadBundler();
+
+ case Action::OffloadWrapperJobClass:
+ return getOffloadWrapper();
}
llvm_unreachable("Invalid tool kind.");
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 98e8c57235a..22b830ee838 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6463,3 +6463,57 @@ void OffloadBundler::ConstructJobMultipleOutputs(
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
CmdArgs, None));
}
+
+void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+ ArgStringList CmdArgs;
+
+ const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
+
+ // Add the "effective" target triple.
+ CmdArgs.push_back("-target");
+ CmdArgs.push_back(Args.MakeArgString(Triple.getTriple()));
+
+ assert(JA.getInputs().size() == Inputs.size() &&
+ "Not have inputs for all dependence actions??");
+
+ // Add offload targets. It is a comma-separated list of offload target
+ // triples.
+ SmallString<128> Targets;
+ Targets += "-offload-targets=";
+ for (unsigned I = 0; I < Inputs.size(); ++I) {
+ if (I)
+ Targets += ',';
+
+ // Get input's Offload Kind and ToolChain.
+ const auto *OA = cast<OffloadAction>(JA.getInputs()[I]);
+ assert(OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) &&
+ "Expected one device dependence!");
+ const ToolChain *DeviceTC = nullptr;
+ OA->doOnEachDependence([&DeviceTC](Action *, const ToolChain *TC,
+ const char *) { DeviceTC = TC; });
+
+ // And add it to the offload targets.
+ Targets += DeviceTC->getTriple().normalize();
+ }
+ CmdArgs.push_back(Args.MakeArgString(Targets));
+
+ // Add the output file name.
+ assert(Output.isFilename() && "Invalid output.");
+ CmdArgs.push_back("-o");
+ CmdArgs.push_back(Output.getFilename());
+
+ // Add inputs.
+ for (const InputInfo &I : Inputs) {
+ assert(I.isFilename() && "Invalid input.");
+ CmdArgs.push_back(I.getFilename());
+ }
+
+ C.addCommand(std::make_unique<Command>(
+ JA, *this,
+ Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
+ CmdArgs, Inputs));
+}
diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h
index 8b6ac43ebd5..b345c02489d 100644
--- a/clang/lib/Driver/ToolChains/Clang.h
+++ b/clang/lib/Driver/ToolChains/Clang.h
@@ -152,6 +152,20 @@ public:
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
+
+/// Offload wrapper tool.
+class LLVM_LIBRARY_VISIBILITY OffloadWrapper final : public Tool {
+public:
+ OffloadWrapper(const ToolChain &TC)
+ : Tool("offload wrapper", "clang-offload-wrapper", TC) {}
+
+ bool hasIntegratedCPP() const override { return false; }
+ void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
+
} // end namespace tools
} // end namespace driver
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 47964099bd9..159b42a6427 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1244,120 +1244,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
}
}
-/// Add OpenMP linker script arguments at the end of the argument list so that
-/// the fat binary is built by embedding each of the device images into the
-/// host. The linker script also defines a few symbols required by the code
-/// generation so that the images can be easily retrieved at runtime by the
-/// offloading library. This should be used only in tool chains that support
-/// linker scripts.
-void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
- const InputInfo &Output,
- const InputInfoList &Inputs,
- const ArgList &Args, ArgStringList &CmdArgs,
- const JobAction &JA) {
-
- // If this is not an OpenMP host toolchain, we don't need to do anything.
- if (!JA.isHostOffloading(Action::OFK_OpenMP))
- return;
-
- // Create temporary linker script. Keep it if save-temps is enabled.
- const char *LKS;
- SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
- if (C.getDriver().isSaveTempsEnabled()) {
- llvm::sys::path::replace_extension(Name, "lk");
- LKS = C.getArgs().MakeArgString(Name.c_str());
- } else {
- llvm::sys::path::replace_extension(Name, "");
- Name = C.getDriver().GetTemporaryPath(Name, "lk");
- LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str()));
- }
-
- // Add linker script option to the command.
- CmdArgs.push_back("-T");
- CmdArgs.push_back(LKS);
-
- // Create a buffer to write the contents of the linker script.
- std::string LksBuffer;
- llvm::raw_string_ostream LksStream(LksBuffer);
-
- // Get the OpenMP offload tool chains so that we can extract the triple
- // associated with each device input.
- auto OpenMPToolChains = C.getOffloadToolChains<Action::OFK_OpenMP>();
- assert(OpenMPToolChains.first != OpenMPToolChains.second &&
- "No OpenMP toolchains??");
-
- // Track the input file name and device triple in order to build the script,
- // inserting binaries in the designated sections.
- SmallVector<std::pair<std::string, const char *>, 8> InputBinaryInfo;
-
- // Add commands to embed target binaries. We ensure that each section and
- // image is 16-byte aligned. This is not mandatory, but increases the
- // likelihood of data to be aligned with a cache block in several main host
- // machines.
- LksStream << "/*\n";
- LksStream << " OpenMP Offload Linker Script\n";
- LksStream << " *** Automatically generated by Clang ***\n";
- LksStream << "*/\n";
- LksStream << "TARGET(binary)\n";
- auto DTC = OpenMPToolChains.first;
- for (auto &II : Inputs) {
- const Action *A = II.getAction();
- // Is this a device linking action?
- if (A && isa<LinkJobAction>(A) &&
- A->isDeviceOffloading(Action::OFK_OpenMP)) {
- assert(DTC != OpenMPToolChains.second &&
- "More device inputs than device toolchains??");
- InputBinaryInfo.push_back(std::make_pair(
- DTC->second->getTriple().normalize(), II.getFilename()));
- ++DTC;
- LksStream << "INPUT(" << II.getFilename() << ")\n";
- }
- }
-
- assert(DTC == OpenMPToolChains.second &&
- "Less device inputs than device toolchains??");
-
- LksStream << "SECTIONS\n";
- LksStream << "{\n";
-
- // Put each target binary into a separate section.
- for (const auto &BI : InputBinaryInfo) {
- LksStream << " .omp_offloading." << BI.first << " :\n";
- LksStream << " ALIGN(0x10)\n";
- LksStream << " {\n";
- LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first
- << " = .);\n";
- LksStream << " " << BI.second << "\n";
- LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first
- << " = .);\n";
- LksStream << " }\n";
- }
-
- LksStream << "}\n";
- LksStream << "INSERT BEFORE .data\n";
- LksStream.flush();
-
- // Dump the contents of the linker script if the user requested that. We
- // support this option to enable testing of behavior with -###.
- if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script))
- llvm::errs() << LksBuffer;
-
- // If this is a dry run, do not create the linker script file.
- if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
- return;
-
- // Open script file and write the contents.
- std::error_code EC;
- llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None);
-
- if (EC) {
- C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
- return;
- }
-
- Lksf << LksBuffer;
-}
-
/// Add HIP linker script arguments at the end of the argument list so that
/// the fat binary is built by embedding the device images into the host. The
/// linker script also defines a symbol required by the code generation so that
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index 1aff07ab8fe..63359d77363 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -45,13 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
llvm::opt::ArgStringList &CmdArgs,
const llvm::opt::ArgList &Args);
-void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
- const InputInfo &Output,
- const InputInfoList &Inputs,
- const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs,
- const JobAction &JA);
-
void AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 4cada743b88..8c704a3078a 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -563,8 +563,6 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(CubinF);
}
- AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
-
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index c7abe5c69a1..c302a31cd2e 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -499,7 +499,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
P = ToolChain.GetFilePath(crtbegin);
}
CmdArgs.push_back(Args.MakeArgString(P));
- }
+ }
// Add crtfastmath.o if available and fast math is enabled.
ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
@@ -623,9 +623,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
}
- // Add OpenMP offloading linker script args if required.
- AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
-
// Add HIP offloading linker script args if required.
AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA,
*this);
OpenPOWER on IntegriCloud