summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorJonas Hahnfeld <hahnjo@hahnjo.de>2017-10-17 13:37:36 +0000
committerJonas Hahnfeld <hahnjo@hahnjo.de>2017-10-17 13:37:36 +0000
commit30b4418e5ac6c7f4b6f88571c4fe0f2b7d6b37d7 (patch)
tree4dd7d460770a8edc8cc3098c35a3ac576d9faf9a /clang
parentda0183947f9f4ca5a642a4abf2bfeb2a99e7430f (diff)
downloadbcm5719-llvm-30b4418e5ac6c7f4b6f88571c4fe0f2b7d6b37d7.tar.gz
bcm5719-llvm-30b4418e5ac6c7f4b6f88571c4fe0f2b7d6b37d7.zip
[CMake][OpenMP] Customize default offloading arch
For the shuffle instructions in reductions we need at least sm_30 but the user may want to customize the default architecture. Differential Revision: https://reviews.llvm.org/D38883 llvm-svn: 315996
Diffstat (limited to 'clang')
-rw-r--r--clang/CMakeLists.txt11
-rw-r--r--clang/include/clang/Config/config.h.cmake3
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp15
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.h11
4 files changed, 19 insertions, 21 deletions
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index b55c64d9e05..42d580077d8 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -235,6 +235,17 @@ endif()
set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
"Default OpenMP runtime used by -fopenmp.")
+# OpenMP offloading requires at least sm_30 because we use shuffle instructions
+# to generate efficient code for reductions.
+set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+ "Default architecture for OpenMP offloading to Nvidia GPUs.")
+string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
+if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
+ message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
+ set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
+ "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
+endif()
+
set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING
"Vendor-specific text for showing with version information.")
diff --git a/clang/include/clang/Config/config.h.cmake b/clang/include/clang/Config/config.h.cmake
index b138b5fcd82..3ee7258b3b3 100644
--- a/clang/include/clang/Config/config.h.cmake
+++ b/clang/include/clang/Config/config.h.cmake
@@ -20,6 +20,9 @@
/* Default OpenMP runtime used by -fopenmp. */
#define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}"
+/* Default architecture for OpenMP offloading to Nvidia GPUs. */
+#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}"
+
/* Multilib suffix for libdir. */
#define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}"
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 4d040a204dd..4f740fc9525 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -542,9 +542,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
// flags are not duplicated.
// Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
- for (Arg *A : Args){
+ for (Arg *A : Args) {
bool IsDuplicate = false;
- for (Arg *DALArg : *DAL){
+ for (Arg *DALArg : *DAL) {
if (A == DALArg) {
IsDuplicate = true;
break;
@@ -555,14 +555,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
}
StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
- if (Arch.empty()) {
- // Default compute capability for CUDA toolchain is the
- // lowest compute capability supported by the installed
- // CUDA version.
- DAL->AddJoinedArg(nullptr,
- Opts.getOption(options::OPT_march_EQ),
- CudaInstallation.getLowestExistingArch());
- }
+ if (Arch.empty())
+ DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+ CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
return DAL;
}
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 5144f5b40bb..1e30aa72702 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@ public:
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
- /// \brief Get lowest available compute capability
- /// for which a libdevice library exists.
- std::string getLowestExistingArch() const {
- std::string LibDeviceFile;
- for (auto key : LibDeviceMap.keys()) {
- LibDeviceFile = LibDeviceMap.lookup(key);
- if (!LibDeviceFile.empty())
- return key;
- }
- return "sm_20";
- }
};
namespace tools {
OpenPOWER on IntegriCloud