diff options
-rw-r--r-- | clang/include/clang/Driver/Options.td | 2 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Cuda.cpp | 8 | ||||
-rw-r--r-- | clang/test/Driver/openmp-offload.c | 14 |
3 files changed, 21 insertions, 3 deletions
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c86941181b7..a303143850e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -544,6 +544,8 @@ def no_cuda_version_check : Flag<["--"], "no-cuda-version-check">, def no_cuda_noopt_device_debug : Flag<["--"], "no-cuda-noopt-device-debug">; def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>, HelpText<"CUDA installation path">; +def fopenmp_ptx_EQ : Joined<["--"], "fopenmp-ptx=">, Flags<[DriverOption]>, + HelpText<"Pass a PTX version +ptxXX, default +ptx42 (for PTX version 4.2) used by OpenMP device offloading.">; def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>, HelpText<"Path to ptxas (used for compiling CUDA code)">; def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 1ff41ce66d4..b6a7b829f50 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -481,7 +481,13 @@ void CudaToolChain::addClangTargetOptions( // than LLVM defaults to. Use PTX4.2 which is the PTX version that // came with CUDA-7.0. CC1Args.push_back("-target-feature"); - CC1Args.push_back("+ptx42"); + + if (DeviceOffloadingKind == Action::OFK_OpenMP) + CC1Args.push_back( + DriverArgs.getLastArgValue(options::OPT_fopenmp_ptx_EQ, + "+ptx42").data()); + else + CC1Args.push_back("+ptx42"); } void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c index cd2ca3e3d6e..30dd1ea2ab0 100644 --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -699,7 +699,7 @@ /// ########################################################################### /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -no-canonical-prefixes %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s // CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c" @@ -708,7 +708,17 @@ /// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP /// Check that the flag is passed when -fopenmp-relocatable-target is used. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -no-canonical-prefixes %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s // CHK-PTXAS-RELO: ptxas{{.*}}" "-c" + +/// ########################################################################### + +/// Check PTXAS is passed the compute capability passed to the driver. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --fopenmp-ptx=+ptx52 -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERSION %s + +// CHK-PTXAS-VERSION: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" +// CHK-PTXAS-VERSION-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" +// CHK-PTXAS-VERSION-NEXT: clang{{.*}}.bc" {{.*}}"-target-feature" "+ptx52" |