summaryrefslogtreecommitdiffstats
path: root/clang/lib/Driver/Driver.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Driver/Driver.cpp')
-rw-r--r--clang/lib/Driver/Driver.cpp332
1 files changed, 218 insertions, 114 deletions
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 6dfd30c55cb..e48dac26bb6 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2151,9 +2151,10 @@ class OffloadingActionBuilder final {
}
};
- /// CUDA action builder. It injects device code in the host backend
- /// action.
- class CudaActionBuilder final : public DeviceActionBuilder {
+ /// Base class for CUDA/HIP action builder. It injects device code in
+ /// the host backend action.
+ class CudaActionBuilderBase : public DeviceActionBuilder {
+ protected:
/// Flags to signal if the user requested host-only or device-only
/// compilation.
bool CompileHostOnly = false;
@@ -2170,115 +2171,11 @@ class OffloadingActionBuilder final {
/// Flag that is set to true if this builder acted on the current input.
bool IsActive = false;
-
public:
- CudaActionBuilder(Compilation &C, DerivedArgList &Args,
- const Driver::InputList &Inputs)
- : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
-
- ActionBuilderReturnCode
- getDeviceDependences(OffloadAction::DeviceDependences &DA,
- phases::ID CurPhase, phases::ID FinalPhase,
- PhasesTy &Phases) override {
- if (!IsActive)
- return ABRT_Inactive;
-
- // If we don't have more CUDA actions, we don't have any dependences to
- // create for the host.
- if (CudaDeviceActions.empty())
- return ABRT_Success;
-
- assert(CudaDeviceActions.size() == GpuArchList.size() &&
- "Expecting one action per GPU architecture.");
- assert(!CompileHostOnly &&
- "Not expecting CUDA actions in host-only compilation.");
-
- // If we are generating code for the device or we are in a backend phase,
- // we attempt to generate the fat binary. We compile each arch to ptx and
- // assemble to cubin, then feed the cubin *and* the ptx into a device
- // "link" action, which uses fatbinary to combine these cubins into one
- // fatbin. The fatbin is then an input to the host action if not in
- // device-only mode.
- if (CompileDeviceOnly || CurPhase == phases::Backend) {
- ActionList DeviceActions;
- for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
- // Produce the device action from the current phase up to the assemble
- // phase.
- for (auto Ph : Phases) {
- // Skip the phases that were already dealt with.
- if (Ph < CurPhase)
- continue;
- // We have to be consistent with the host final phase.
- if (Ph > FinalPhase)
- break;
-
- CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
- C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);
-
- if (Ph == phases::Assemble)
- break;
- }
-
- // If we didn't reach the assemble phase, we can't generate the fat
- // binary. We don't need to generate the fat binary if we are not in
- // device-only mode.
- if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
- CompileDeviceOnly)
- continue;
-
- Action *AssembleAction = CudaDeviceActions[I];
- assert(AssembleAction->getType() == types::TY_Object);
- assert(AssembleAction->getInputs().size() == 1);
-
- Action *BackendAction = AssembleAction->getInputs()[0];
- assert(BackendAction->getType() == types::TY_PP_Asm);
-
- for (auto &A : {AssembleAction, BackendAction}) {
- OffloadAction::DeviceDependences DDep;
- DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
- Action::OFK_Cuda);
- DeviceActions.push_back(
- C.MakeAction<OffloadAction>(DDep, A->getType()));
- }
- }
-
- // We generate the fat binary if we have device input actions.
- if (!DeviceActions.empty()) {
- CudaFatBinary =
- C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
-
- if (!CompileDeviceOnly) {
- DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
- Action::OFK_Cuda);
- // Clear the fat binary, it is already a dependence to an host
- // action.
- CudaFatBinary = nullptr;
- }
-
- // Remove the CUDA actions as they are already connected to an host
- // action or fat binary.
- CudaDeviceActions.clear();
- }
-
- // We avoid creating host action in device-only mode.
- return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
- } else if (CurPhase > phases::Backend) {
- // If we are past the backend phase and still have a device action, we
- // don't have to do anything as this action is already a device
- // top-level action.
- return ABRT_Success;
- }
-
- assert(CurPhase < phases::Backend && "Generating single CUDA "
- "instructions should only occur "
- "before the backend phase!");
-
- // By default, we produce an action for each device arch.
- for (Action *&A : CudaDeviceActions)
- A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
-
- return ABRT_Success;
- }
+ CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
+ const Driver::InputList &Inputs,
+ Action::OffloadKind OFKind)
+ : DeviceActionBuilder(C, Args, Inputs, OFKind) {}
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
// While generating code for CUDA, we only depend on the host input action
@@ -2317,6 +2214,17 @@ class OffloadingActionBuilder final {
return ABRT_Success;
}
+ // If this is an unbundling action use it as is for each CUDA toolchain.
+ if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
+ CudaDeviceActions.clear();
+ for (auto Arch : GpuArchList) {
+ CudaDeviceActions.push_back(UA);
+ UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch),
+ AssociatedOffloadKind);
+ }
+ return ABRT_Success;
+ }
+
return IsActive ? ABRT_Success : ABRT_Inactive;
}
@@ -2325,7 +2233,7 @@ class OffloadingActionBuilder final {
auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
- Action::OFK_Cuda);
+ AssociatedOffloadKind);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
};
@@ -2354,8 +2262,17 @@ class OffloadingActionBuilder final {
}
bool initialize() override {
+ assert(AssociatedOffloadKind == Action::OFK_Cuda ||
+ AssociatedOffloadKind == Action::OFK_HIP);
+
// We don't need to support CUDA.
- if (!C.hasOffloadToolChain<Action::OFK_Cuda>())
+ if (AssociatedOffloadKind == Action::OFK_Cuda &&
+ !C.hasOffloadToolChain<Action::OFK_Cuda>())
+ return false;
+
+ // We don't need to support HIP.
+ if (AssociatedOffloadKind == Action::OFK_HIP &&
+ !C.hasOffloadToolChain<Action::OFK_HIP>())
return false;
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
@@ -2370,7 +2287,10 @@ class OffloadingActionBuilder final {
return true;
}
- ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>());
+ ToolChains.push_back(
+ AssociatedOffloadKind == Action::OFK_Cuda
+ ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
+ : C.getSingleOffloadToolChain<Action::OFK_HIP>());
Arg *PartialCompilationArg = Args.getLastArg(
options::OPT_cuda_host_only, options::OPT_cuda_device_only,
@@ -2423,6 +2343,187 @@ class OffloadingActionBuilder final {
}
};
+ /// \brief CUDA action builder. It injects device code in the host backend
+ /// action.
+ class CudaActionBuilder final : public CudaActionBuilderBase {
+ public:
+ CudaActionBuilder(Compilation &C, DerivedArgList &Args,
+ const Driver::InputList &Inputs)
+ : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {}
+
+ ActionBuilderReturnCode
+ getDeviceDependences(OffloadAction::DeviceDependences &DA,
+ phases::ID CurPhase, phases::ID FinalPhase,
+ PhasesTy &Phases) override {
+ if (!IsActive)
+ return ABRT_Inactive;
+
+ // If we don't have more CUDA actions, we don't have any dependences to
+ // create for the host.
+ if (CudaDeviceActions.empty())
+ return ABRT_Success;
+
+ assert(CudaDeviceActions.size() == GpuArchList.size() &&
+ "Expecting one action per GPU architecture.");
+ assert(!CompileHostOnly &&
+ "Not expecting CUDA actions in host-only compilation.");
+
+ // If we are generating code for the device or we are in a backend phase,
+ // we attempt to generate the fat binary. We compile each arch to ptx and
+ // assemble to cubin, then feed the cubin *and* the ptx into a device
+ // "link" action, which uses fatbinary to combine these cubins into one
+ // fatbin. The fatbin is then an input to the host action if not in
+ // device-only mode.
+ if (CompileDeviceOnly || CurPhase == phases::Backend) {
+ ActionList DeviceActions;
+ for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+ // Produce the device action from the current phase up to the assemble
+ // phase.
+ for (auto Ph : Phases) {
+ // Skip the phases that were already dealt with.
+ if (Ph < CurPhase)
+ continue;
+ // We have to be consistent with the host final phase.
+ if (Ph > FinalPhase)
+ break;
+
+ CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
+ C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);
+
+ if (Ph == phases::Assemble)
+ break;
+ }
+
+ // If we didn't reach the assemble phase, we can't generate the fat
+ // binary. We don't need to generate the fat binary if we are not in
+ // device-only mode.
+ if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
+ CompileDeviceOnly)
+ continue;
+
+ Action *AssembleAction = CudaDeviceActions[I];
+ assert(AssembleAction->getType() == types::TY_Object);
+ assert(AssembleAction->getInputs().size() == 1);
+
+ Action *BackendAction = AssembleAction->getInputs()[0];
+ assert(BackendAction->getType() == types::TY_PP_Asm);
+
+ for (auto &A : {AssembleAction, BackendAction}) {
+ OffloadAction::DeviceDependences DDep;
+ DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
+ Action::OFK_Cuda);
+ DeviceActions.push_back(
+ C.MakeAction<OffloadAction>(DDep, A->getType()));
+ }
+ }
+
+ // We generate the fat binary if we have device input actions.
+ if (!DeviceActions.empty()) {
+ CudaFatBinary =
+ C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
+
+ if (!CompileDeviceOnly) {
+ DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
+ Action::OFK_Cuda);
+ // Clear the fat binary, it is already a dependence to an host
+ // action.
+ CudaFatBinary = nullptr;
+ }
+
+ // Remove the CUDA actions as they are already connected to an host
+ // action or fat binary.
+ CudaDeviceActions.clear();
+ }
+
+ // We avoid creating host action in device-only mode.
+ return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
+ } else if (CurPhase > phases::Backend) {
+ // If we are past the backend phase and still have a device action, we
+ // don't have to do anything as this action is already a device
+ // top-level action.
+ return ABRT_Success;
+ }
+
+ assert(CurPhase < phases::Backend && "Generating single CUDA "
+ "instructions should only occur "
+ "before the backend phase!");
+
+ // By default, we produce an action for each device arch.
+ for (Action *&A : CudaDeviceActions)
+ A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
+
+ return ABRT_Success;
+ }
+ };
+ /// \brief HIP action builder. It injects device code in the host backend
+ /// action.
+ class HIPActionBuilder final : public CudaActionBuilderBase {
+ /// The linker inputs obtained for each device arch.
+ SmallVector<ActionList, 8> DeviceLinkerInputs;
+
+ public:
+ HIPActionBuilder(Compilation &C, DerivedArgList &Args,
+ const Driver::InputList &Inputs)
+ : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
+
+ bool canUseBundlerUnbundler() const override { return true; }
+
+ ActionBuilderReturnCode
+ getDeviceDependences(OffloadAction::DeviceDependences &DA,
+ phases::ID CurPhase, phases::ID FinalPhase,
+ PhasesTy &Phases) override {
+ // amdgcn does not support linking of object files, therefore we skip
+ // backend and assemble phases to output LLVM IR.
+ if (CudaDeviceActions.empty() || CurPhase == phases::Backend ||
+ CurPhase == phases::Assemble)
+ return ABRT_Success;
+
+ assert((CurPhase == phases::Link ||
+ CudaDeviceActions.size() == GpuArchList.size()) &&
+ "Expecting one action per GPU architecture.");
+ assert(!CompileHostOnly &&
+ "Not expecting CUDA actions in host-only compilation.");
+
+ // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
+ // This happens to each device action originated from each input file.
+ // Later on, device actions in DeviceLinkerInputs are used to create
+ // device link actions in appendLinkDependences and the created device
+ // link actions are passed to the offload action as device dependence.
+ if (CurPhase == phases::Link) {
+ DeviceLinkerInputs.resize(CudaDeviceActions.size());
+ auto LI = DeviceLinkerInputs.begin();
+ for (auto *A : CudaDeviceActions) {
+ LI->push_back(A);
+ ++LI;
+ }
+
+ // We will pass the device action as a host dependence, so we don't
+ // need to do anything else with them.
+ CudaDeviceActions.clear();
+ return ABRT_Success;
+ }
+
+ // By default, we produce an action for each device arch.
+ for (Action *&A : CudaDeviceActions)
+ A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
+ AssociatedOffloadKind);
+
+ return ABRT_Success;
+ }
+
+ void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+ // Append a new link action for each device.
+ unsigned I = 0;
+ for (auto &LI : DeviceLinkerInputs) {
+ auto *DeviceLinkAction =
+ C.MakeAction<LinkJobAction>(LI, types::TY_Image);
+ DA.add(*DeviceLinkAction, *ToolChains[0],
+ CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+ ++I;
+ }
+ }
+ };
+
/// OpenMP action builder. The host bitcode is passed to the device frontend
/// and all the device linked images are passed to the host link phase.
class OpenMPActionBuilder final : public DeviceActionBuilder {
@@ -2589,6 +2690,9 @@ public:
// Create a specialized builder for CUDA.
SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
+ // Create a specialized builder for HIP.
+ SpecializedBuilders.push_back(new HIPActionBuilder(C, Args, Inputs));
+
// Create a specialized builder for OpenMP.
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
OpenPOWER on IntegriCloud