summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/Cuda.h1
-rw-r--r--clang/lib/Basic/Cuda.cpp5
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp1
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp2
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-features.cl2
-rw-r--r--clang/test/Driver/amdgpu-macros.cl7
-rw-r--r--clang/test/Driver/amdgpu-mcpu.cl2
-rw-r--r--clang/test/Driver/cuda-bad-arch.cu2
9 files changed, 28 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index d96c7e0972f..df35e1b93ba 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -64,6 +64,7 @@ enum class CudaArch {
GFX902,
GFX904,
GFX906,
+ GFX908,
GFX909,
LAST,
};
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 4bc8d8c5b42..d19925ab107 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -109,6 +109,8 @@ const char *CudaArchToString(CudaArch A) {
return "gfx904";
case CudaArch::GFX906: // TBA
return "gfx906";
+ case CudaArch::GFX908: // TBA
+ return "gfx908";
case CudaArch::GFX909: // TBA
return "gfx909";
}
@@ -147,6 +149,7 @@ CudaArch StringToCudaArch(llvm::StringRef S) {
.Case("gfx902", CudaArch::GFX902)
.Case("gfx904", CudaArch::GFX904)
.Case("gfx906", CudaArch::GFX906)
+ .Case("gfx908", CudaArch::GFX908)
.Case("gfx909", CudaArch::GFX909)
.Default(CudaArch::UNKNOWN);
}
@@ -259,6 +262,7 @@ CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
+ case CudaArch::GFX908:
case CudaArch::GFX909:
return CudaVirtualArch::COMPUTE_AMDGCN;
}
@@ -306,6 +310,7 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
+ case CudaArch::GFX908:
case CudaArch::GFX909:
return CudaVersion::CUDA_70;
}
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 568f2677855..b5c82e28857 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -152,6 +152,12 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["gfx10-insts"] = true;
Features["s-memrealtime"] = true;
break;
+ case GK_GFX908:
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ LLVM_FALLTHROUGH;
case GK_GFX906:
Features["dl-insts"] = true;
Features["dot1-insts"] = true;
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index c917e3d4198..8ca36fbcc38 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -191,6 +191,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
+ case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::LAST:
break;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 5183af1bdbf..88a86930ccd 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4928,6 +4928,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
+ case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::UNKNOWN:
break;
@@ -4982,6 +4983,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
case CudaArch::GFX902:
case CudaArch::GFX904:
case CudaArch::GFX906:
+ case CudaArch::GFX908:
case CudaArch::GFX909:
case CudaArch::UNKNOWN:
break;
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index e963aed6f78..0bb3d6f3df7 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -5,6 +5,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx908 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -15,6 +16,7 @@
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index dc3917a4555..93b9e4fdf9d 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -175,6 +175,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX902 %s
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX904 %s
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX906 %s
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX908 %s
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX909 %s
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX1010 %s
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,GFX1011 %s
@@ -195,6 +196,7 @@
// GFX902-DAG: #define FP_FAST_FMA 1
// GFX904-DAG: #define FP_FAST_FMA 1
// GFX906-DAG: #define FP_FAST_FMA 1
+// GFX908-DAG: #define FP_FAST_FMA 1
// GFX909-DAG: #define FP_FAST_FMA 1
// GFX1010-DAG: #define FP_FAST_FMA 1
// GFX1011-DAG: #define FP_FAST_FMA 1
@@ -215,6 +217,7 @@
// GFX902-DAG: #define FP_FAST_FMAF 1
// GFX904-DAG: #define FP_FAST_FMAF 1
// GFX906-DAG: #define FP_FAST_FMAF 1
+// GFX908-DAG: #define FP_FAST_FMAF 1
// GFX909-DAG: #define FP_FAST_FMAF 1
// GFX1010-DAG: #define FP_FAST_FMAF 1
// GFX1011-DAG: #define FP_FAST_FMAF 1
@@ -239,6 +242,7 @@
// GFX902-DAG: #define __HAS_FMAF__ 1
// GFX904-DAG: #define __HAS_FMAF__ 1
// GFX906-DAG: #define __HAS_FMAF__ 1
+// GFX908-DAG: #define __HAS_FMAF__ 1
// GFX909-DAG: #define __HAS_FMAF__ 1
// GFX1010-DAG: #define __HAS_FMAF__ 1
// GFX1011-DAG: #define __HAS_FMAF__ 1
@@ -259,6 +263,7 @@
// GFX902-DAG: #define __HAS_FP64__ 1
// GFX904-DAG: #define __HAS_FP64__ 1
// GFX906-DAG: #define __HAS_FP64__ 1
+// GFX908-DAG: #define __HAS_FP64__ 1
// GFX909-DAG: #define __HAS_FP64__ 1
// GFX1010-DAG: #define __HAS_FP64__ 1
// GFX1011-DAG: #define __HAS_FP64__ 1
@@ -279,6 +284,7 @@
// GFX902-DAG: #define __HAS_LDEXPF__ 1
// GFX904-DAG: #define __HAS_LDEXPF__ 1
// GFX906-DAG: #define __HAS_LDEXPF__ 1
+// GFX908-DAG: #define __HAS_LDEXPF__ 1
// GFX909-DAG: #define __HAS_LDEXPF__ 1
// GFX1010-DAG: #define __HAS_LDEXPF__ 1
// GFX1011-DAG: #define __HAS_LDEXPF__ 1
@@ -299,6 +305,7 @@
// GFX902-DAG: #define __gfx902__ 1
// GFX904-DAG: #define __gfx904__ 1
// GFX906-DAG: #define __gfx906__ 1
+// GFX908-DAG: #define __gfx908__ 1
// GFX909-DAG: #define __gfx909__ 1
// GFX1010-DAG: #define __gfx1010__ 1
// GFX1011-DAG: #define __gfx1011__ 1
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index 8d76516dcd2..1559c7fafc7 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -84,6 +84,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx902 %s 2>&1 | FileCheck --check-prefix=GFX902 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck --check-prefix=GFX904 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck --check-prefix=GFX906 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefix=GFX908 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefix=GFX909 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
@@ -121,6 +122,7 @@
// GFX902: "-target-cpu" "gfx902"
// GFX904: "-target-cpu" "gfx904"
// GFX906: "-target-cpu" "gfx906"
+// GFX908: "-target-cpu" "gfx908"
// GFX909: "-target-cpu" "gfx909"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
diff --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu
index 00d955233be..321d67e75fe 100644
--- a/clang/test/Driver/cuda-bad-arch.cu
+++ b/clang/test/Driver/cuda-bad-arch.cu
@@ -23,6 +23,8 @@
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=gfx908 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
OpenPOWER on IntegriCloud