summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2017-09-07 18:14:32 +0000
committerArtem Belevich <tra@google.com>2017-09-07 18:14:32 +0000
commit8af4e23d1ee7e93e9bda835be2b97550a13c0737 (patch)
tree5510b5995366fd536e9a222f4fbc20af6b34472c
parent9420ec3378b8502434846e84ae14e57652a78279 (diff)
downloadbcm5719-llvm-8af4e23d1ee7e93e9bda835be2b97550a13c0737.tar.gz
bcm5719-llvm-8af4e23d1ee7e93e9bda835be2b97550a13c0737.zip
[CUDA] Added rudimentary support for CUDA-9 and sm_70.
For now CUDA-9 is not included in the list of CUDA versions clang searches for, so the path to CUDA-9 must be explicitly passed via --cuda-path=. On LLVM side NVPTX added sm_70 GPU type which bumps required PTX version to 6.0, but otherwise is equivalent to sm_62 at the moment. Differential Revision: https://reviews.llvm.org/D37576 llvm-svn: 312734
-rw-r--r--clang/include/clang/Basic/Cuda.h3
-rw-r--r--clang/lib/Basic/Cuda.cpp12
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp2
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp88
-rw-r--r--clang/lib/Headers/__clang_cuda_runtime_wrapper.h9
-rw-r--r--clang/test/Driver/cuda-arch-translation.cu36
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.td5
-rw-r--r--llvm/test/CodeGen/NVPTX/sm-version-70.ll5
8 files changed, 111 insertions, 49 deletions
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index ad1139b8c19..b273365975d 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -21,6 +21,7 @@ enum class CudaVersion {
CUDA_70,
CUDA_75,
CUDA_80,
+ CUDA_90,
};
const char *CudaVersionToString(CudaVersion V);
@@ -41,6 +42,7 @@ enum class CudaArch {
SM_60,
SM_61,
SM_62,
+ SM_70,
};
const char *CudaArchToString(CudaArch A);
@@ -60,6 +62,7 @@ enum class CudaVirtualArch {
COMPUTE_60,
COMPUTE_61,
COMPUTE_62,
+ COMPUTE_70,
};
const char *CudaVirtualArchToString(CudaVirtualArch A);
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 3264078b98f..3a5297b0c64 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -16,6 +16,8 @@ const char *CudaVersionToString(CudaVersion V) {
return "7.5";
case CudaVersion::CUDA_80:
return "8.0";
+ case CudaVersion::CUDA_90:
+ return "9.0";
}
llvm_unreachable("invalid enum");
}
@@ -48,6 +50,8 @@ const char *CudaArchToString(CudaArch A) {
return "sm_61";
case CudaArch::SM_62:
return "sm_62";
+ case CudaArch::SM_70:
+ return "sm_70";
}
llvm_unreachable("invalid enum");
}
@@ -66,6 +70,7 @@ CudaArch StringToCudaArch(llvm::StringRef S) {
.Case("sm_60", CudaArch::SM_60)
.Case("sm_61", CudaArch::SM_61)
.Case("sm_62", CudaArch::SM_62)
+ .Case("sm_70", CudaArch::SM_70)
.Default(CudaArch::UNKNOWN);
}
@@ -95,6 +100,8 @@ const char *CudaVirtualArchToString(CudaVirtualArch A) {
return "compute_61";
case CudaVirtualArch::COMPUTE_62:
return "compute_62";
+ case CudaVirtualArch::COMPUTE_70:
+ return "compute_70";
}
llvm_unreachable("invalid enum");
}
@@ -112,6 +119,7 @@ CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
.Case("compute_60", CudaVirtualArch::COMPUTE_60)
.Case("compute_61", CudaVirtualArch::COMPUTE_61)
.Case("compute_62", CudaVirtualArch::COMPUTE_62)
+ .Case("compute_70", CudaVirtualArch::COMPUTE_70)
.Default(CudaVirtualArch::UNKNOWN);
}
@@ -142,6 +150,8 @@ CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
return CudaVirtualArch::COMPUTE_61;
case CudaArch::SM_62:
return CudaVirtualArch::COMPUTE_62;
+ case CudaArch::SM_70:
+ return CudaVirtualArch::COMPUTE_70;
}
llvm_unreachable("invalid enum");
}
@@ -164,6 +174,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
case CudaArch::SM_61:
case CudaArch::SM_62:
return CudaVersion::CUDA_80;
+ case CudaArch::SM_70:
+ return CudaVersion::CUDA_90;
}
llvm_unreachable("invalid enum");
}
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 92e21c34b8f..3889f097350 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -183,6 +183,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
return "610";
case CudaArch::SM_62:
return "620";
+ case CudaArch::SM_70:
+ return "700";
}
llvm_unreachable("unhandled CudaArch");
}();
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 49a23028659..91bec1d55a1 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -49,6 +49,8 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
return CudaVersion::CUDA_75;
if (Major == 8 && Minor == 0)
return CudaVersion::CUDA_80;
+ if (Major == 9 && Minor == 0)
+ return CudaVersion::CUDA_90;
return CudaVersion::UNKNOWN;
}
@@ -112,43 +114,55 @@ CudaInstallationDetector::CudaInstallationDetector(
Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
}
- std::error_code EC;
- for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
- !EC && LI != LE; LI = LI.increment(EC)) {
- StringRef FilePath = LI->path();
- StringRef FileName = llvm::sys::path::filename(FilePath);
- // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
- const StringRef LibDeviceName = "libdevice.";
- if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
- continue;
- StringRef GpuArch = FileName.slice(
- LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
- LibDeviceMap[GpuArch] = FilePath.str();
- // Insert map entries for specifc devices with this compute
- // capability. NVCC's choice of the libdevice library version is
- // rather peculiar and depends on the CUDA version.
- if (GpuArch == "compute_20") {
- LibDeviceMap["sm_20"] = FilePath;
- LibDeviceMap["sm_21"] = FilePath;
- LibDeviceMap["sm_32"] = FilePath;
- } else if (GpuArch == "compute_30") {
- LibDeviceMap["sm_30"] = FilePath;
- if (Version < CudaVersion::CUDA_80) {
- LibDeviceMap["sm_50"] = FilePath;
- LibDeviceMap["sm_52"] = FilePath;
- LibDeviceMap["sm_53"] = FilePath;
- }
- LibDeviceMap["sm_60"] = FilePath;
- LibDeviceMap["sm_61"] = FilePath;
- LibDeviceMap["sm_62"] = FilePath;
- } else if (GpuArch == "compute_35") {
- LibDeviceMap["sm_35"] = FilePath;
- LibDeviceMap["sm_37"] = FilePath;
- } else if (GpuArch == "compute_50") {
- if (Version >= CudaVersion::CUDA_80) {
- LibDeviceMap["sm_50"] = FilePath;
- LibDeviceMap["sm_52"] = FilePath;
- LibDeviceMap["sm_53"] = FilePath;
+ if (Version == CudaVersion::CUDA_90) {
+ // CUDA-9 uses single libdevice file for all GPU variants.
+ std::string FilePath = LibDevicePath + "/libdevice.10.bc";
+ if (FS.exists(FilePath)) {
+ for (const char *GpuArch :
+ {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
+ "sm_60", "sm_61", "sm_62", "sm_70"})
+ LibDeviceMap[GpuArch] = FilePath;
+ }
+ } else {
+ std::error_code EC;
+ for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
+ !EC && LI != LE; LI = LI.increment(EC)) {
+ StringRef FilePath = LI->path();
+ StringRef FileName = llvm::sys::path::filename(FilePath);
+ // Process all bitcode filenames that look like
+ // libdevice.compute_XX.YY.bc
+ const StringRef LibDeviceName = "libdevice.";
+ if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
+ continue;
+ StringRef GpuArch = FileName.slice(
+ LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
+ LibDeviceMap[GpuArch] = FilePath.str();
+ // Insert map entries for specifc devices with this compute
+ // capability. NVCC's choice of the libdevice library version is
+ // rather peculiar and depends on the CUDA version.
+ if (GpuArch == "compute_20") {
+ LibDeviceMap["sm_20"] = FilePath;
+ LibDeviceMap["sm_21"] = FilePath;
+ LibDeviceMap["sm_32"] = FilePath;
+ } else if (GpuArch == "compute_30") {
+ LibDeviceMap["sm_30"] = FilePath;
+ if (Version < CudaVersion::CUDA_80) {
+ LibDeviceMap["sm_50"] = FilePath;
+ LibDeviceMap["sm_52"] = FilePath;
+ LibDeviceMap["sm_53"] = FilePath;
+ }
+ LibDeviceMap["sm_60"] = FilePath;
+ LibDeviceMap["sm_61"] = FilePath;
+ LibDeviceMap["sm_62"] = FilePath;
+ } else if (GpuArch == "compute_35") {
+ LibDeviceMap["sm_35"] = FilePath;
+ LibDeviceMap["sm_37"] = FilePath;
+ } else if (GpuArch == "compute_50") {
+ if (Version >= CudaVersion::CUDA_80) {
+ LibDeviceMap["sm_50"] = FilePath;
+ LibDeviceMap["sm_52"] = FilePath;
+ LibDeviceMap["sm_53"] = FilePath;
+ }
}
}
}
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 931d44b6965..b5b173cd0cd 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -62,7 +62,7 @@
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
-#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
+#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9000
#error "Unsupported CUDA version!"
#endif
@@ -86,7 +86,11 @@
#define __COMMON_FUNCTIONS_H__
#undef __CUDACC__
+#if CUDA_VERSION < 9000
#define __CUDABE__
+#else
+#define __CUDA_LIBDEVICE__
+#endif
// Disables definitions of device-side runtime support stubs in
// cuda_device_runtime_api.h
#include "driver_types.h"
@@ -94,6 +98,7 @@
#include "host_defines.h"
#undef __CUDABE__
+#undef __CUDA_LIBDEVICE__
#define __CUDACC__
#include "cuda_runtime.h"
@@ -105,7 +110,9 @@
#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
+#if CUDA_VERSION < 9000
#include "crt/device_runtime.h"
+#endif
#include "crt/host_runtime.h"
// device_runtime.h defines __cxa_* macros that will conflict with
// cxxabi.h.
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index 64ddb3178cd..a26d3740ff1 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -5,26 +5,36 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
-// CHECK:fatbinary
-
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM20 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_21 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM21 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM21 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_30 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM30 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM30 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_32 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM32 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM32 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_35 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM35 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_37 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM37 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM37 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_50 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM50 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM50 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_52 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM52 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM52 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_53 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM53 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM53 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_60 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM60 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_61 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM61 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_62 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM62 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM70 %s
+
+// COMMON: ptxas
+// COMMON-SAME: -m64
+// COMMON: fatbinary
// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
@@ -35,3 +45,7 @@
// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
+// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
+// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
+// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
+// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index c77ddbc9978..aba37d36359 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -50,6 +50,8 @@ def SM61 : SubtargetFeature<"sm_61", "SmVersion", "61",
"Target SM 6.1">;
def SM62 : SubtargetFeature<"sm_62", "SmVersion", "62",
"Target SM 6.2">;
+def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70",
+ "Target SM 7.0">;
def SATOM : SubtargetFeature<"satom", "HasAtomScope", "true",
"Atomic operations with scope">;
@@ -67,6 +69,8 @@ def PTX43 : SubtargetFeature<"ptx43", "PTXVersion", "43",
"Use PTX version 4.3">;
def PTX50 : SubtargetFeature<"ptx50", "PTXVersion", "50",
"Use PTX version 5.0">;
+def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60",
+ "Use PTX version 6.0">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -87,6 +91,7 @@ def : Proc<"sm_53", [SM53, PTX42]>;
def : Proc<"sm_60", [SM60, PTX50, SATOM]>;
def : Proc<"sm_61", [SM61, PTX50, SATOM]>;
def : Proc<"sm_62", [SM62, PTX50, SATOM]>;
+def : Proc<"sm_70", [SM70, PTX60, SATOM]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/llvm/test/CodeGen/NVPTX/sm-version-70.ll b/llvm/test/CodeGen/NVPTX/sm-version-70.ll
new file mode 100644
index 00000000000..8b72d50747a
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/sm-version-70.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_70 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s
+
+; CHECK: .version 6.0
+; CHECK: .target sm_70
OpenPOWER on IntegriCloud