summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2019-03-05 16:07:56 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2019-03-05 16:07:56 +0000
commitab851939fc6baf883a1760f18f48adc93ba2b01d (patch)
treec98f37b4e684f283a24c1e3d8e5e991bd4ec3108
parent130322e7cc58b7236fd2181126adedc4d90d2322 (diff)
downloadbcm5719-llvm-ab851939fc6baf883a1760f18f48adc93ba2b01d.tar.gz
bcm5719-llvm-ab851939fc6baf883a1760f18f48adc93ba2b01d.zip
[HIP] Do not unbundle object files for -fno-gpu-rdc
When -fno-gpu-rdc is set, device code is compiled, linked, and assembled into fat binary and embedded as string in object files. The object files are normal object files which can be linked by host linker. In the linking stage, the object files should not be unbundled when -fno-gpu-rdc is set since they are normal object files, not bundles. The object files only need to be unbundled when -fgpu-rdc is set. Currently clang always unbundles object files, disregarding -fgpu-rdc option. This patch fixes that. Differential Revision: https://reviews.llvm.org/D58917 llvm-svn: 355410
-rw-r--r--clang/lib/Driver/Driver.cpp23
-rw-r--r--clang/test/Driver/hip-binding.hip9
-rw-r--r--clang/test/Driver/hip-link-shared-library.hip2
3 files changed, 22 insertions, 12 deletions
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index eb03e6e87cc..de6a67bd6e1 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2293,6 +2293,9 @@ class OffloadingActionBuilder final {
/// Flag that is set to true if this builder acted on the current input.
bool IsActive = false;
+
+ /// Flag for -fgpu-rdc.
+ bool Relocatable = false;
public:
CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs,
@@ -2338,6 +2341,12 @@ class OffloadingActionBuilder final {
// If this is an unbundling action use it as is for each CUDA toolchain.
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
+
+ // If -fgpu-rdc is disabled, should not unbundle since there is no
+ // device code to link.
+ if (!Relocatable)
+ return ABRT_Inactive;
+
CudaDeviceActions.clear();
auto *IA = cast<InputAction>(UA->getInputs().back());
std::string FileName = IA->getInputArg().getAsString(Args);
@@ -2409,6 +2418,9 @@ class OffloadingActionBuilder final {
!C.hasOffloadToolChain<Action::OFK_HIP>())
return false;
+ Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
+ options::OPT_fno_gpu_rdc, /*Default=*/false);
+
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
if (HostTC->getTriple().isNVPTX() ||
@@ -2594,13 +2606,11 @@ class OffloadingActionBuilder final {
class HIPActionBuilder final : public CudaActionBuilderBase {
/// The linker inputs obtained for each device arch.
SmallVector<ActionList, 8> DeviceLinkerInputs;
- bool Relocatable;
public:
HIPActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
- : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP),
- Relocatable(false) {}
+ : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
bool canUseBundlerUnbundler() const override { return true; }
@@ -2705,13 +2715,6 @@ class OffloadingActionBuilder final {
++I;
}
}
-
- bool initialize() override {
- Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
- options::OPT_fno_gpu_rdc, /*Default=*/false);
-
- return CudaActionBuilderBase::initialize();
- }
};
/// OpenMP action builder. The host bitcode is passed to the device frontend
diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip
index 5425bf651a7..d173edabc77 100644
--- a/clang/test/Driver/hip-binding.hip
+++ b/clang/test/Driver/hip-binding.hip
@@ -4,7 +4,7 @@
// RUN: touch %t.o
// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
-// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
// RUN: 2>&1 | FileCheck %s
// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
@@ -13,3 +13,10 @@
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]"
// CHECK-NOT: offload bundler
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out"
+
+// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s
+
+// NORDC-NOT: offload bundler
+// NORDC: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["{{.*o}}"], output: "a.out"
diff --git a/clang/test/Driver/hip-link-shared-library.hip b/clang/test/Driver/hip-link-shared-library.hip
index b7b301a9e39..cb409d1a874 100644
--- a/clang/test/Driver/hip-link-shared-library.hip
+++ b/clang/test/Driver/hip-link-shared-library.hip
@@ -1,7 +1,7 @@
// RUN: touch %t.o
// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o %S/Inputs/in.so \
-// RUN: 2>&1 | FileCheck %s
+// RUN: -fgpu-rdc 2>&1 | FileCheck %s
// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]"
OpenPOWER on IntegriCloud