summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Driver/ToolChain.h6
-rw-r--r--clang/lib/Driver/ToolChain.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp23
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp26
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.h4
-rw-r--r--clang/test/Driver/openmp-offload-gpu.c79
6 files changed, 99 insertions, 43 deletions
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 943ec4a5c98..1206794e93d 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -40,6 +40,7 @@ namespace driver {
class Compilation;
class CudaInstallationDetector;
class Driver;
+ class InputInfo;
class JobAction;
class RegisterEffectiveTriple;
class SanitizerArgs;
@@ -172,6 +173,11 @@ public:
/// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu.
virtual const llvm::Triple *getAuxTriple() const { return nullptr; }
+ /// Some toolchains need to modify the file name, for example to replace the
+ /// extension for object files with .cubin for OpenMP offloading to Nvidia
+ /// GPUs.
+ virtual std::string getInputFilename(const InputInfo &Input) const;
+
llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
StringRef getArchName() const { return Triple.getArchName(); }
StringRef getPlatform() const { return Triple.getVendorName(); }
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 89c996ca5a1..fac0a5bbff5 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -215,6 +215,10 @@ StringRef ToolChain::getDefaultUniversalArchName() const {
}
}
+std::string ToolChain::getInputFilename(const InputInfo &Input) const {
+ return Input.getFilename();
+}
+
bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
return false;
}
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index eae86c05729..2f1fa4af807 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5310,12 +5310,15 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
if (I)
Triples += ',';
+ // Find ToolChain for this input.
Action::OffloadKind CurKind = Action::OFK_Host;
const ToolChain *CurTC = &getToolChain();
const Action *CurDep = JA.getInputs()[I];
if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
+ CurTC = nullptr;
OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
CurKind = A->getOffloadingDeviceKind();
CurTC = TC;
});
@@ -5336,7 +5339,17 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
for (unsigned I = 0; I < Inputs.size(); ++I) {
if (I)
UB += ',';
- UB += Inputs[I].getFilename();
+
+ // Find ToolChain for this input.
+ const ToolChain *CurTC = &getToolChain();
+ if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
+ CurTC = nullptr;
+ OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
+ CurTC = TC;
+ });
+ }
+ UB += CurTC->getInputFilename(Inputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
@@ -5396,13 +5409,7 @@ void OffloadBundler::ConstructJobMultipleOutputs(
for (unsigned I = 0; I < Outputs.size(); ++I) {
if (I)
UB += ',';
- SmallString<256> OutputFileName(Outputs[I].getFilename());
- // Change extension of target files for OpenMP offloading
- // to NVIDIA GPUs.
- if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() &&
- JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- UB += OutputFileName;
+ UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
CmdArgs.push_back("-unbundle");
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 44ec16e8b86..4a5cf3dc6ae 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -301,10 +301,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
- SmallString<256> OutputFileName(Output.getFilename());
- if (JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- CmdArgs.push_back(Args.MakeArgString(OutputFileName));
+ CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
for (const auto& II : Inputs)
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
@@ -431,11 +428,8 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
if (!II.isFilename())
continue;
- SmallString<256> Name(II.getFilename());
- llvm::sys::path::replace_extension(Name, "cubin");
-
- const char *CubinF =
- C.addTempFile(C.getArgs().MakeArgString(Name));
+ const char *CubinF = C.addTempFile(
+ C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
CmdArgs.push_back(CubinF);
}
@@ -463,6 +457,20 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
getProgramPaths().push_back(getDriver().Dir);
}
+std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
+ // Only object files are changed, for example assembly files keep their .s
+ // extensions. CUDA also continues to use .o as they don't use nvlink but
+ // fatbinary.
+ if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
+ return ToolChain::getInputFilename(Input);
+
+ // Replace extension for object files with cubin because nvlink relies on
+ // these particular file names.
+ SmallString<256> Filename(ToolChain::getInputFilename(Input));
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ return Filename.str();
+}
+
void CudaToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 23cca03eb20..3d08cec1643 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -137,10 +137,12 @@ public:
const ToolChain &HostTC, const llvm::opt::ArgList &Args,
const Action::OffloadKind OK);
- virtual const llvm::Triple *getAuxTriple() const override {
+ const llvm::Triple *getAuxTriple() const override {
return &HostTC.getTriple();
}
+ std::string getInputFilename(const InputInfo &Input) const override;
+
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index abef552a6e6..ad53f96112a 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -10,7 +10,8 @@
/// ###########################################################################
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
@@ -19,7 +20,9 @@
/// ###########################################################################
/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp \
+// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
+// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
@@ -28,43 +31,67 @@
/// ###########################################################################
/// Check cubin file generation and usage by nvlink
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+
+// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
+
+/// ###########################################################################
-// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-NEXT: nvlink" {{.*}}.cubin"
+/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
+// RUN: touch %t.s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
+/// Use DAG to ensure that assembly file has been unbundled.
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]]
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s
+/// Check cubin file generation and bundling
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
-// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: nvlink" {{.*}}.cubin"
+// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink
-// RUN: touch %t1.o
-// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file unbundling and usage by nvlink
+// RUN: touch %t.o
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
-// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+/// Use DAG to ensure that cubin file has been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+/// Check cubin file generation and usage by nvlink
// RUN: touch %t1.o
// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
-// CHK-TWOCUBIN-DARWIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
/// ###########################################################################
@@ -77,7 +104,8 @@
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
+// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
@@ -86,7 +114,8 @@
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
+// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
OpenPOWER on IntegriCloud