summaryrefslogtreecommitdiffstats
path: root/clang/lib/Driver/ToolChains/Cuda.cpp
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2018-04-10 18:38:22 +0000
committerArtem Belevich <tra@google.com>2018-04-10 18:38:22 +0000
commitdde3dc27ee71f12eb145ce54158779ab4ddc38ed (patch)
treeeaccb2951afc4f620ca671f5b3cc153ef8909325 /clang/lib/Driver/ToolChains/Cuda.cpp
parent5da361a0b0f5e361c8285acdf9c9cd5d417416f9 (diff)
downloadbcm5719-llvm-dde3dc27ee71f12eb145ce54158779ab4ddc38ed.tar.gz
bcm5719-llvm-dde3dc27ee71f12eb145ce54158779ab4ddc38ed.zip
[CUDA] Added --[no-]cuda-include-ptx=sm_XX|all option.
Currently we always include PTX into the fatbin along with the GPU code.It about doubles the size of the GPU binary we need to carry in the executable. These options allow control inclusion of PTX into GPU binary. This patch does not change the defaults, though we may consider making no-PTX the default in the future. Differential Revision: https://reviews.llvm.org/D45495 llvm-svn: 329737
Diffstat (limited to 'clang/lib/Driver/ToolChains/Cuda.cpp')
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp19
1 files changed, 19 insertions, 0 deletions
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index f383e017234..c83d066fa26 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -377,6 +377,22 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
+static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
+ bool includePTX = true;
+ for (Arg *A : Args) {
+ if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
+ A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
+ continue;
+ A->claim();
+ const StringRef ArchStr = A->getValue();
+ if (ArchStr == "all" || ArchStr == gpu_arch) {
+ includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
+ continue;
+ }
+ }
+ return includePTX;
+}
+
// All inputs to this linker must be from CudaDeviceActions, as we need to look
// at the Inputs' Actions in order to figure out which GPU architecture they
// correspond to.
@@ -404,6 +420,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
"Device action expected to have associated a GPU architecture!");
CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
+ if (II.getType() == types::TY_PP_Asm &&
+ !shouldIncludePTX(Args, gpu_arch_str))
+ continue;
// We need to pass an Arch of the form "sm_XX" for cubin files and
// "compute_XX" for ptx.
const char *Arch =
OpenPOWER on IntegriCloud