summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/Driver/Driver.cpp20
-rw-r--r--clang/test/Driver/cuda-phases.cu10
2 files changed, 20 insertions, 10 deletions
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index c784fec1366..41c1da333ff 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2616,17 +2616,19 @@ class OffloadingActionBuilder final {
C.MakeAction<LinkJobAction>(CudaDeviceActions,
types::TY_HIP_FATBIN);
- DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
- AssociatedOffloadKind);
- // Clear the fat binary, it is already a dependence to an host
- // action.
- CudaFatBinary = nullptr;
+ if (!CompileDeviceOnly) {
+ DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
+ AssociatedOffloadKind);
+ // Clear the fat binary, it is already a dependence to an host
+ // action.
+ CudaFatBinary = nullptr;
+ }
// Remove the CUDA actions as they are already connected to an host
// action or fat binary.
CudaDeviceActions.clear();
- return ABRT_Success;
+ return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
} else if (CurPhase == phases::Link) {
// Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
// This happens to each device action originated from each input file.
@@ -3014,8 +3016,10 @@ public:
}
// If we can use the bundler, replace the host action by the bundling one in
- // the resulting list. Otherwise, just append the device actions.
- if (CanUseBundler && !OffloadAL.empty()) {
+ // the resulting list. Otherwise, just append the device actions. For
+ // device only compilation, HostAction is a null pointer, therefore only do
+ // this when HostAction is not a null pointer.
+ if (CanUseBundler && HostAction && !OffloadAL.empty()) {
// Add the host action to the list in order to create the bundling action.
OffloadAL.push_back(HostAction);
diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu
index 2160d19f5c6..58be50ae2e1 100644
--- a/clang/test/Driver/cuda-phases.cu
+++ b/clang/test/Driver/cuda-phases.cu
@@ -157,6 +157,7 @@
// HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
// HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
// HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
+// HBIN-NOT: device
//
// Test single gpu architecture up to the assemble phase in host-only
// compilation mode.
@@ -172,6 +173,7 @@
// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HASM-NOT: device
//
// Test two gpu architectures with complete compilation in host-only
@@ -190,6 +192,7 @@
// HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
// HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
// HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
+// HBIN2-NOT: device
//
// Test two gpu architectures up to the assemble phase in host-only
@@ -206,6 +209,7 @@
// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
// HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HASM2-NOT: device
//
// Test single gpu architecture with complete compilation in device-only
@@ -224,7 +228,7 @@
// DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
// DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
// DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
-
+// DBIN-NOT: host
//
// Test single gpu architecture up to the assemble phase in device-only
// compilation mode.
@@ -241,6 +245,7 @@
// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
// DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
// DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
+// DASM-NOT: host
//
// Test two gpu architectures with complete compilation in device-only
@@ -265,7 +270,7 @@
// DBIN2_NV-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
// DBIN2_NV-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
// DBIN2_NV-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object
-
+// DBIN2-NOT: host
//
// Test two gpu architectures up to the assemble phase in device-only
// compilation mode.
@@ -288,3 +293,4 @@
// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
// DASM2_NV-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
// DASM2_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
+// DASM2-NOT: host
OpenPOWER on IntegriCloud