diff options
| author | Jonas Hahnfeld <hahnjo@hahnjo.de> | 2017-10-17 13:37:36 +0000 |
|---|---|---|
| committer | Jonas Hahnfeld <hahnjo@hahnjo.de> | 2017-10-17 13:37:36 +0000 |
| commit | 30b4418e5ac6c7f4b6f88571c4fe0f2b7d6b37d7 (patch) | |
| tree | 4dd7d460770a8edc8cc3098c35a3ac576d9faf9a /clang | |
| parent | da0183947f9f4ca5a642a4abf2bfeb2a99e7430f (diff) | |
| download | bcm5719-llvm-30b4418e5ac6c7f4b6f88571c4fe0f2b7d6b37d7.tar.gz bcm5719-llvm-30b4418e5ac6c7f4b6f88571c4fe0f2b7d6b37d7.zip | |
[CMake][OpenMP] Customize default offloading arch
For the shuffle instructions in reductions we need at least sm_30
but the user may want to customize the default architecture.
Differential Revision: https://reviews.llvm.org/D38883
llvm-svn: 315996
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | clang/include/clang/Config/config.h.cmake | 3 | ||||
| -rw-r--r-- | clang/lib/Driver/ToolChains/Cuda.cpp | 15 | ||||
| -rw-r--r-- | clang/lib/Driver/ToolChains/Cuda.h | 11 |
4 files changed, 19 insertions, 21 deletions
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index b55c64d9e05..42d580077d8 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -235,6 +235,17 @@ endif() set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING "Default OpenMP runtime used by -fopenmp.") +# OpenMP offloading requires at least sm_30 because we use shuffle instructions +# to generate efficient code for reductions. +set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs.") +string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}") +if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30") + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) +endif() + set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING "Vendor-specific text for showing with version information.") diff --git a/clang/include/clang/Config/config.h.cmake b/clang/include/clang/Config/config.h.cmake index b138b5fcd82..3ee7258b3b3 100644 --- a/clang/include/clang/Config/config.h.cmake +++ b/clang/include/clang/Config/config.h.cmake @@ -20,6 +20,9 @@ /* Default OpenMP runtime used by -fopenmp. */ #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}" +/* Default architecture for OpenMP offloading to Nvidia GPUs. */ +#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}" + /* Multilib suffix for libdir. */ #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}" diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 4d040a204dd..4f740fc9525 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -542,9 +542,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, // flags are not duplicated. // Also append the compute capability. if (DeviceOffloadKind == Action::OFK_OpenMP) { - for (Arg *A : Args){ + for (Arg *A : Args) { bool IsDuplicate = false; - for (Arg *DALArg : *DAL){ + for (Arg *DALArg : *DAL) { if (A == DALArg) { IsDuplicate = true; break; @@ -555,14 +555,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); - if (Arch.empty()) { - // Default compute capability for CUDA toolchain is the - // lowest compute capability supported by the installed - // CUDA version. - DAL->AddJoinedArg(nullptr, - Opts.getOption(options::OPT_march_EQ), - CudaInstallation.getLowestExistingArch()); - } + if (Arch.empty()) + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + CLANG_OPENMP_NVPTX_DEFAULT_ARCH); return DAL; } diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 5144f5b40bb..1e30aa72702 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -76,17 +76,6 @@ public: std::string getLibDeviceFile(StringRef Gpu) const { return LibDeviceMap.lookup(Gpu); } - /// \brief Get lowest available compute capability - /// for which a libdevice library exists. - std::string getLowestExistingArch() const { - std::string LibDeviceFile; - for (auto key : LibDeviceMap.keys()) { - LibDeviceFile = LibDeviceMap.lookup(key); - if (!LibDeviceFile.empty()) - return key; - } - return "sm_20"; - } }; namespace tools { |

