summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2018-03-21 22:22:59 +0000
committerArtem Belevich <tra@google.com>2018-03-21 22:22:59 +0000
commitecb178bb356fc058a51bbda0d083068b5d83ba2b (patch)
tree8f1e67971ee6bb3575f461000f388531c7a92a8e
parent1dce44e8e82c09822e843ccd1df3c6c5de7839f4 (diff)
downloadbcm5719-llvm-ecb178bb356fc058a51bbda0d083068b5d83ba2b.tar.gz
bcm5719-llvm-ecb178bb356fc058a51bbda0d083068b5d83ba2b.zip
[CUDA] Disable LTO for device-side compilations.
This fixes host-side LTO during CUDA compilation. Before, LTO pipeline construction was clashing with CUDA pipeline construction. At the moment there's no point doing LTO on device side as each device-side TU is a complete program. We will need to figure out compilation pipeline construction for the device-side LTO when we have working support for multi-TU device-side CUDA compilation. Differential Revision: https://reviews.llvm.org/D44691 llvm-svn: 328161
-rw-r--r--clang/include/clang/Driver/Driver.h6
-rw-r--r--clang/lib/Driver/Driver.cpp9
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp6
-rw-r--r--clang/test/Driver/lto.cu76
-rw-r--r--clang/test/Driver/thinlto.cu50
5 files changed, 140 insertions, 7 deletions
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index 0e55dab25f5..ca2ca30868d 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -456,8 +456,10 @@ public:
/// ConstructAction - Construct the appropriate action to do for
/// \p Phase on the \p Input, taking in to account arguments
/// like -fsyntax-only or --analyze.
- Action *ConstructPhaseAction(Compilation &C, const llvm::opt::ArgList &Args,
- phases::ID Phase, Action *Input) const;
+ Action *ConstructPhaseAction(
+ Compilation &C, const llvm::opt::ArgList &Args, phases::ID Phase,
+ Action *Input,
+ Action::OffloadKind TargetDeviceOffloadKind = Action::OFK_None) const;
/// BuildJobsForAction - Construct the jobs to perform for the action \p A and
/// return an InputInfo for the result of running \p A. Will only construct
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 4712daf0e19..ad65f6f61e5 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2171,7 +2171,7 @@ class OffloadingActionBuilder final {
break;
CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
- C, Args, Ph, CudaDeviceActions[I]);
+ C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);
if (Ph == phases::Assemble)
break;
@@ -3011,8 +3011,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
-Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
- phases::ID Phase, Action *Input) const {
+Action *Driver::ConstructPhaseAction(
+ Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input,
+ Action::OffloadKind TargetDeviceOffloadKind) const {
llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
// Some types skip the assembler phase (e.g., llvm-bc), but we can't
@@ -3074,7 +3075,7 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
}
case phases::Backend: {
- if (isUsingLTO()) {
+ if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a1feb3a667c..a86c5376e4c 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3249,7 +3249,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (JA.getType() == types::TY_LLVM_BC)
CmdArgs.push_back("-emit-llvm-uselists");
- if (D.isUsingLTO()) {
+ // Device-side jobs do not support LTO.
+ bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
+ JA.isDeviceOffloading(Action::OFK_Host));
+
+ if (D.isUsingLTO() && !isDeviceOffloadAction) {
Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ);
// The Darwin and PS4 linkers currently use the legacy LTO API, which
diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu
new file mode 100644
index 00000000000..40c99883835
--- /dev/null
+++ b/clang/test/Driver/lto.cu
@@ -0,0 +1,76 @@
+// -flto causes a switch to llvm-bc object files.
+// RUN: %clangxx -nocudainc -nocudalib -ccc-print-phases -c %s -flto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s
+//
+// CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
+// CHECK-COMPILE-ACTIONS-NOT: lto-bc
+// CHECK-COMPILE-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
+
+// RUN: %clangxx -nocudainc -nocudalib -ccc-print-phases %s -flto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s
+//
+// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}lto.cu", cuda, (host-cuda)
+// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output
+// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
+// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}lto.cu", cuda, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object
+// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler
+// CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
+// CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir
+// CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
+// CHECK-COMPILELINK-ACTIONS: 13: linker, {12}, image, (host-cuda)
+
+// llvm-bc and llvm-ll outputs need to match regular suffixes
+// (unfortunately).
+// RUN: %clangxx %s -nocudainc -nocudalib -flto -save-temps -### 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
+//
+// CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"
+// CHECK-COMPILELINK-SUFFIXES: "-o" "[[BC:.*lto-host.*\.bc]]" {{.*}}[[CPP]]"
+// CHECK-COMPILELINK-SUFFIXES: "-o" "[[OBJ:.*lto-host.*\.o]]" {{.*}}[[BC]]"
+// CHECK-COMPILELINK-SUFFIXES: "{{.*}}a.{{(out|exe)}}" {{.*}}[[OBJ]]"
+
+// RUN: %clangxx %s -nocudainc -nocudalib -flto -S -### 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILE-SUFFIXES < %t %s
+//
+// CHECK-COMPILE-SUFFIXES: "-o" "{{.*}}lto.s" "-x" "cuda" "{{.*}}lto.cu"
+
+// RUN: not %clangxx -nocudainc -nocudalib %s -emit-llvm 2>&1 \
+// RUN: | FileCheck --check-prefix=LLVM-LINK %s
+// LLVM-LINK: -emit-llvm cannot be used when linking
+
+// -flto should cause link using gold plugin
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-LTO-ACTION < %t %s
+//
+// CHECK-LINK-LTO-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
+
+// -flto=full should cause link using gold plugin
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto=full 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-FULL-ACTION < %t %s
+//
+// CHECK-LINK-FULL-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
+
+// Check that subsequent -fno-lto takes precedence
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto=full -fno-lto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-NOLTO-ACTION < %t %s
+//
+// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
+
+// -flto passes along an explicit debugger tuning argument.
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto -glldb 2> %t
+// RUN: FileCheck -check-prefix=CHECK-TUNING-LLDB < %t %s
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto -g 2> %t
+// RUN: FileCheck -check-prefix=CHECK-NO-TUNING < %t %s
+//
+// CHECK-TUNING-LLDB: "-plugin-opt=-debugger-tune=lldb"
+// CHECK-NO-TUNING-NOT: "-plugin-opt=-debugger-tune
diff --git a/clang/test/Driver/thinlto.cu b/clang/test/Driver/thinlto.cu
new file mode 100644
index 00000000000..7a87015723f
--- /dev/null
+++ b/clang/test/Driver/thinlto.cu
@@ -0,0 +1,50 @@
+// -flto=thin causes a switch to llvm-bc object files.
+// RUN: %clangxx -ccc-print-phases -nocudainc -nocudalib -c %s -flto=thin 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s
+//
+// CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
+// CHECK-COMPILE-ACTIONS-NOT: lto-bc
+// CHECK-COMPILE-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
+
+// RUN: %clangxx -ccc-print-phases -nocudainc -nocudalib %s -flto=thin 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s
+//
+// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}thinlto.cu", cuda, (host-cuda)
+// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cuda-cpp-output
+// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir, (host-cuda)
+// CHECK-COMPILELINK-ACTIONS: 3: input, "{{.*}}thinlto.cu", cuda, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 5: compiler, {4}, ir, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 6: backend, {5}, assembler, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 7: assembler, {6}, object, (device-cuda, sm_20)
+// CHECK-COMPILELINK-ACTIONS: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object
+// CHECK-COMPILELINK-ACTIONS: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler
+// CHECK-COMPILELINK-ACTIONS: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
+// CHECK-COMPILELINK-ACTIONS: 11: offload, "host-cuda {{.*}}" {2}, "device-cuda{{.*}}" {10}, ir
+// CHECK-COMPILELINK-ACTIONS: 12: backend, {11}, lto-bc, (host-cuda)
+// CHECK-COMPILELINK-ACTIONS: 13: linker, {12}, image, (host-cuda)
+
+// -flto=thin should cause link using gold plugin with thinlto option,
+// also confirm that it takes precedence over earlier -fno-lto and -flto=full.
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto=full -fno-lto -flto=thin 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-THIN-ACTION < %t %s
+//
+// CHECK-LINK-THIN-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
+// CHECK-LINK-THIN-ACTION: "-plugin-opt=thinlto"
+
+// Check that subsequent -flto=full takes precedence
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto=thin -flto=full 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-FULL-ACTION < %t %s
+//
+// CHECK-LINK-FULL-ACTION: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
+// CHECK-LINK-FULL-ACTION-NOT: "-plugin-opt=thinlto"
+
+// Check that subsequent -fno-lto takes precedence
+// RUN: %clangxx -nocudainc -nocudalib \
+// RUN: -target x86_64-unknown-linux -### %s -flto=thin -fno-lto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-NOLTO-ACTION < %t %s
+//
+// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin" "{{.*}}{{[/\\]}}LLVMgold.{{dll|dylib|so}}"
+// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin-opt=thinlto"
OpenPOWER on IntegriCloud