summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/docs/ClangCommandLineReference.rst4
-rw-r--r--clang/include/clang/Driver/Options.td4
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp19
-rw-r--r--clang/test/Driver/cuda-options.cu51
4 files changed, 78 insertions, 0 deletions
diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index 8e9ef84c99c..67eb56a4cef 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -144,6 +144,10 @@ Compile CUDA code for device only
CUDA GPU architecture (e.g. sm\_35). May be specified more than once.
+.. option:: --cuda-include-ptx=<arg>, --no-cuda-include-ptx=<arg>
+
+Include (or not) PTX along with CUDA GPU binary for the given architecture (e.g. sm\_35). Argument may be 'all'. The option may be specified more than once. Default: --cuda-include-ptx=all
+
.. option:: --cuda-host-only
Compile CUDA code for host only. Has no effect on non-CUDA compilations.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 58b5341b348..a08003b75a0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -546,6 +546,10 @@ def cuda_host_only : Flag<["--"], "cuda-host-only">,
def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">,
HelpText<"Compile CUDA code for both host and device (default). Has no "
"effect on non-CUDA compilations.">;
+def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[DriverOption]>,
+ HelpText<"Include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
+def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[DriverOption]>,
+ HelpText<"Do not include PTX for the follwing GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
HelpText<"CUDA GPU architecture (e.g. sm_35). May be specified more than once.">;
def no_cuda_gpu_arch_EQ : Joined<["--"], "no-cuda-gpu-arch=">, Flags<[DriverOption]>,
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index f383e017234..c83d066fa26 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -377,6 +377,22 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
+static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
+ bool includePTX = true;
+ for (Arg *A : Args) {
+ if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
+ A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
+ continue;
+ A->claim();
+ const StringRef ArchStr = A->getValue();
+ if (ArchStr == "all" || ArchStr == gpu_arch) {
+ includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
+ continue;
+ }
+ }
+ return includePTX;
+}
+
// All inputs to this linker must be from CudaDeviceActions, as we need to look
// at the Inputs' Actions in order to figure out which GPU architecture they
// correspond to.
@@ -404,6 +420,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
"Device action expected to have associated a GPU architecture!");
CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
+ if (II.getType() == types::TY_PP_Asm &&
+ !shouldIncludePTX(Args, gpu_arch_str))
+ continue;
// We need to pass an Arch of the form "sm_XX" for cubin files and
// "compute_XX" for ptx.
const char *Arch =
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 68a35391d0b..4ffab317d16 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -142,6 +142,48 @@
// RUN: -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCHALLERROR %s
+
+// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
+// a) by default we're including PTX for all GPUs.
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
+
+// b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-include-ptx=all \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s
+
+// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-include-ptx=sm_35 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-include-ptx=sm_30 \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s
+
+// d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
+
+// e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
+// RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \
+// RUN: -c %s 2>&1 \
+// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
+
+
// ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
// NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
// ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
@@ -236,3 +278,12 @@
// Match no linker.
// NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
+
+// FATBIN-COMMON:fatbinary
+// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
+// FATBIN-COMMON: "--image=profile=sm_30,file=
+// PTX-SM30: "--image=profile=compute_30,file=
+// NOPTX-SM30-NOT: "--image=profile=compute_30,file=
+// FATBIN-COMMON: "--image=profile=sm_35,file=
+// PTX-SM35: "--image=profile=compute_35,file=
+// NOPTX-SM35-NOT: "--image=profile=compute_35,file=
OpenPOWER on IntegriCloud