summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/Attr.td6
-rw-r--r--clang/include/clang/Basic/AttrDocs.td27
-rw-r--r--clang/lib/CodeGen/CGCall.cpp2
-rw-r--r--clang/lib/Headers/opencl-c.h335
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp3
-rw-r--r--clang/test/CodeGenOpenCL/convergent.cl118
-rw-r--r--clang/test/SemaOpenCL/convergent.cl12
7 files changed, 336 insertions, 167 deletions
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 709af0a6dbd..11f4a13ab14 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1026,6 +1026,12 @@ def NoDuplicate : InheritableAttr {
let Documentation = [NoDuplicateDocs];
}
+def Convergent : InheritableAttr {
+ let Spellings = [GNU<"convergent">, CXX11<"clang", "convergent">];
+ let Subjects = SubjectList<[Function]>;
+ let Documentation = [ConvergentDocs];
+}
+
def NoInline : InheritableAttr {
let Spellings = [GCC<"noinline">, Declspec<"noinline">];
let Subjects = SubjectList<[Function]>;
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index f6ebda76566..6253d5a8bc4 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -606,6 +606,33 @@ of the condition.
}];
}
+def ConvergentDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+The ``convergent`` attribute can be placed on a function declaration. It is
+translated into the LLVM ``convergent`` attribute, which indicates that the call
+instructions of a function with this attribute cannot be made control-dependent
+on any additional values.
+
+In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA,
+the call instructions of a function with this attribute must be executed by
+all work items or threads in a work group or sub group.
+
+This attribute is different from ``noduplicate`` because it allows duplicating
+function calls if it can be proved that the duplicated function calls are
+not made control-dependent on any additional values, e.g., unrolling a loop
+executed by all work items.
+
+Sample usage:
+.. code-block:: c
+
+ void convfunc(void) __attribute__((convergent));
+ // Setting it as a C++11 attribute is also valid in a C++ program.
+ // void convfunc(void) [[clang::convergent]];
+
+ }];
+}
+
def NoSplitStackDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 86aaad04361..d57fb2eac2e 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1648,6 +1648,8 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
if (TargetDecl->hasAttr<NoDuplicateAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
+ if (TargetDecl->hasAttr<ConvergentAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::Convergent);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 3eb6dc712f3..004eca35a22 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17,6 +17,7 @@
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define __ovld __attribute__((overloadable))
+#define __conv __attribute__((convergent))
// Optimizations
#define __purefn __attribute__((pure))
@@ -13822,7 +13823,7 @@ typedef uint cl_mem_fence_flags;
* image objects and then want to read the updated data.
*/
-void __ovld barrier(cl_mem_fence_flags flags);
+void __ovld __conv barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
@@ -13835,8 +13836,8 @@ typedef enum memory_scope
memory_scope_sub_group
} memory_scope;
-void __ovld work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
-void __ovld work_group_barrier(cl_mem_fence_flags flags);
+void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
+void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions
@@ -16568,101 +16569,101 @@ int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
// OpenCL v2.0 s6.13.15 - Work-group Functions
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
-int __ovld work_group_all(int predicate);
-int __ovld work_group_any(int predicate);
+int __ovld __conv work_group_all(int predicate);
+int __ovld __conv work_group_any(int predicate);
#ifdef cl_khr_fp16
-half __ovld work_group_broadcast(half a, size_t local_id);
-half __ovld work_group_broadcast(half a, size_t x, size_t y);
-half __ovld work_group_broadcast(half a, size_t x, size_t y, size_t z);
+half __ovld __conv work_group_broadcast(half a, size_t local_id);
+half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);
+half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);
#endif
-int __ovld work_group_broadcast(int a, size_t local_id);
-int __ovld work_group_broadcast(int a, size_t x, size_t y);
-int __ovld work_group_broadcast(int a, size_t x, size_t y, size_t z);
-uint __ovld work_group_broadcast(uint a, size_t local_id);
-uint __ovld work_group_broadcast(uint a, size_t x, size_t y);
-uint __ovld work_group_broadcast(uint a, size_t x, size_t y, size_t z);
-long __ovld work_group_broadcast(long a, size_t local_id);
-long __ovld work_group_broadcast(long a, size_t x, size_t y);
-long __ovld work_group_broadcast(long a, size_t x, size_t y, size_t z);
-ulong __ovld work_group_broadcast(ulong a, size_t local_id);
-ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y);
-ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y, size_t z);
-float __ovld work_group_broadcast(float a, size_t local_id);
-float __ovld work_group_broadcast(float a, size_t x, size_t y);
-float __ovld work_group_broadcast(float a, size_t x, size_t y, size_t z);
+int __ovld __conv work_group_broadcast(int a, size_t local_id);
+int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);
+int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);
+uint __ovld __conv work_group_broadcast(uint a, size_t local_id);
+uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);
+uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);
+long __ovld __conv work_group_broadcast(long a, size_t local_id);
+long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);
+long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);
+ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);
+ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);
+ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);
+float __ovld __conv work_group_broadcast(float a, size_t local_id);
+float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);
+float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);
#ifdef cl_khr_fp64
-double __ovld work_group_broadcast(double a, size_t local_id);
-double __ovld work_group_broadcast(double a, size_t x, size_t y);
-double __ovld work_group_broadcast(double a, size_t x, size_t y, size_t z);
+double __ovld __conv work_group_broadcast(double a, size_t local_id);
+double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);
+double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);
#endif //cl_khr_fp64
#ifdef cl_khr_fp16
-half __ovld work_group_reduce_add(half x);
-half __ovld work_group_reduce_min(half x);
-half __ovld work_group_reduce_max(half x);
-half __ovld work_group_scan_exclusive_add(half x);
-half __ovld work_group_scan_exclusive_min(half x);
-half __ovld work_group_scan_exclusive_max(half x);
-half __ovld work_group_scan_inclusive_add(half x);
-half __ovld work_group_scan_inclusive_min(half x);
-half __ovld work_group_scan_inclusive_max(half x);
+half __ovld __conv work_group_reduce_add(half x);
+half __ovld __conv work_group_reduce_min(half x);
+half __ovld __conv work_group_reduce_max(half x);
+half __ovld __conv work_group_scan_exclusive_add(half x);
+half __ovld __conv work_group_scan_exclusive_min(half x);
+half __ovld __conv work_group_scan_exclusive_max(half x);
+half __ovld __conv work_group_scan_inclusive_add(half x);
+half __ovld __conv work_group_scan_inclusive_min(half x);
+half __ovld __conv work_group_scan_inclusive_max(half x);
#endif
-int __ovld work_group_reduce_add(int x);
-int __ovld work_group_reduce_min(int x);
-int __ovld work_group_reduce_max(int x);
-int __ovld work_group_scan_exclusive_add(int x);
-int __ovld work_group_scan_exclusive_min(int x);
-int __ovld work_group_scan_exclusive_max(int x);
-int __ovld work_group_scan_inclusive_add(int x);
-int __ovld work_group_scan_inclusive_min(int x);
-int __ovld work_group_scan_inclusive_max(int x);
-uint __ovld work_group_reduce_add(uint x);
-uint __ovld work_group_reduce_min(uint x);
-uint __ovld work_group_reduce_max(uint x);
-uint __ovld work_group_scan_exclusive_add(uint x);
-uint __ovld work_group_scan_exclusive_min(uint x);
-uint __ovld work_group_scan_exclusive_max(uint x);
-uint __ovld work_group_scan_inclusive_add(uint x);
-uint __ovld work_group_scan_inclusive_min(uint x);
-uint __ovld work_group_scan_inclusive_max(uint x);
-long __ovld work_group_reduce_add(long x);
-long __ovld work_group_reduce_min(long x);
-long __ovld work_group_reduce_max(long x);
-long __ovld work_group_scan_exclusive_add(long x);
-long __ovld work_group_scan_exclusive_min(long x);
-long __ovld work_group_scan_exclusive_max(long x);
-long __ovld work_group_scan_inclusive_add(long x);
-long __ovld work_group_scan_inclusive_min(long x);
-long __ovld work_group_scan_inclusive_max(long x);
-ulong __ovld work_group_reduce_add(ulong x);
-ulong __ovld work_group_reduce_min(ulong x);
-ulong __ovld work_group_reduce_max(ulong x);
-ulong __ovld work_group_scan_exclusive_add(ulong x);
-ulong __ovld work_group_scan_exclusive_min(ulong x);
-ulong __ovld work_group_scan_exclusive_max(ulong x);
-ulong __ovld work_group_scan_inclusive_add(ulong x);
-ulong __ovld work_group_scan_inclusive_min(ulong x);
-ulong __ovld work_group_scan_inclusive_max(ulong x);
-float __ovld work_group_reduce_add(float x);
-float __ovld work_group_reduce_min(float x);
-float __ovld work_group_reduce_max(float x);
-float __ovld work_group_scan_exclusive_add(float x);
-float __ovld work_group_scan_exclusive_min(float x);
-float __ovld work_group_scan_exclusive_max(float x);
-float __ovld work_group_scan_inclusive_add(float x);
-float __ovld work_group_scan_inclusive_min(float x);
-float __ovld work_group_scan_inclusive_max(float x);
+int __ovld __conv work_group_reduce_add(int x);
+int __ovld __conv work_group_reduce_min(int x);
+int __ovld __conv work_group_reduce_max(int x);
+int __ovld __conv work_group_scan_exclusive_add(int x);
+int __ovld __conv work_group_scan_exclusive_min(int x);
+int __ovld __conv work_group_scan_exclusive_max(int x);
+int __ovld __conv work_group_scan_inclusive_add(int x);
+int __ovld __conv work_group_scan_inclusive_min(int x);
+int __ovld __conv work_group_scan_inclusive_max(int x);
+uint __ovld __conv work_group_reduce_add(uint x);
+uint __ovld __conv work_group_reduce_min(uint x);
+uint __ovld __conv work_group_reduce_max(uint x);
+uint __ovld __conv work_group_scan_exclusive_add(uint x);
+uint __ovld __conv work_group_scan_exclusive_min(uint x);
+uint __ovld __conv work_group_scan_exclusive_max(uint x);
+uint __ovld __conv work_group_scan_inclusive_add(uint x);
+uint __ovld __conv work_group_scan_inclusive_min(uint x);
+uint __ovld __conv work_group_scan_inclusive_max(uint x);
+long __ovld __conv work_group_reduce_add(long x);
+long __ovld __conv work_group_reduce_min(long x);
+long __ovld __conv work_group_reduce_max(long x);
+long __ovld __conv work_group_scan_exclusive_add(long x);
+long __ovld __conv work_group_scan_exclusive_min(long x);
+long __ovld __conv work_group_scan_exclusive_max(long x);
+long __ovld __conv work_group_scan_inclusive_add(long x);
+long __ovld __conv work_group_scan_inclusive_min(long x);
+long __ovld __conv work_group_scan_inclusive_max(long x);
+ulong __ovld __conv work_group_reduce_add(ulong x);
+ulong __ovld __conv work_group_reduce_min(ulong x);
+ulong __ovld __conv work_group_reduce_max(ulong x);
+ulong __ovld __conv work_group_scan_exclusive_add(ulong x);
+ulong __ovld __conv work_group_scan_exclusive_min(ulong x);
+ulong __ovld __conv work_group_scan_exclusive_max(ulong x);
+ulong __ovld __conv work_group_scan_inclusive_add(ulong x);
+ulong __ovld __conv work_group_scan_inclusive_min(ulong x);
+ulong __ovld __conv work_group_scan_inclusive_max(ulong x);
+float __ovld __conv work_group_reduce_add(float x);
+float __ovld __conv work_group_reduce_min(float x);
+float __ovld __conv work_group_reduce_max(float x);
+float __ovld __conv work_group_scan_exclusive_add(float x);
+float __ovld __conv work_group_scan_exclusive_min(float x);
+float __ovld __conv work_group_scan_exclusive_max(float x);
+float __ovld __conv work_group_scan_inclusive_add(float x);
+float __ovld __conv work_group_scan_inclusive_min(float x);
+float __ovld __conv work_group_scan_inclusive_max(float x);
#ifdef cl_khr_fp64
-double __ovld work_group_reduce_add(double x);
-double __ovld work_group_reduce_min(double x);
-double __ovld work_group_reduce_max(double x);
-double __ovld work_group_scan_exclusive_add(double x);
-double __ovld work_group_scan_exclusive_min(double x);
-double __ovld work_group_scan_exclusive_max(double x);
-double __ovld work_group_scan_inclusive_add(double x);
-double __ovld work_group_scan_inclusive_min(double x);
-double __ovld work_group_scan_inclusive_max(double x);
+double __ovld __conv work_group_reduce_add(double x);
+double __ovld __conv work_group_reduce_min(double x);
+double __ovld __conv work_group_reduce_max(double x);
+double __ovld __conv work_group_scan_exclusive_add(double x);
+double __ovld __conv work_group_scan_exclusive_min(double x);
+double __ovld __conv work_group_scan_exclusive_max(double x);
+double __ovld __conv work_group_scan_inclusive_add(double x);
+double __ovld __conv work_group_scan_inclusive_min(double x);
+double __ovld __conv work_group_scan_inclusive_max(double x);
#endif //cl_khr_fp64
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@@ -16762,92 +16763,92 @@ uint __ovld get_enqueued_num_sub_groups(void);
uint __ovld get_sub_group_id(void);
uint __ovld get_sub_group_local_id(void);
-void __ovld sub_group_barrier(cl_mem_fence_flags flags);
+void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
-void __ovld sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
+void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
-int __ovld sub_group_all(int predicate);
-int __ovld sub_group_any(int predicate);
-
-int __ovld sub_group_broadcast(int x, uint sub_group_local_id);
-uint __ovld sub_group_broadcast(uint x, uint sub_group_local_id);
-long __ovld sub_group_broadcast(long x, uint sub_group_local_id);
-ulong __ovld sub_group_broadcast(ulong x, uint sub_group_local_id);
-float __ovld sub_group_broadcast(float x, uint sub_group_local_id);
-
-int __ovld sub_group_reduce_add(int x);
-uint __ovld sub_group_reduce_add(uint x);
-long __ovld sub_group_reduce_add(long x);
-ulong __ovld sub_group_reduce_add(ulong x);
-float __ovld sub_group_reduce_add(float x);
-int __ovld sub_group_reduce_min(int x);
-uint __ovld sub_group_reduce_min(uint x);
-long __ovld sub_group_reduce_min(long x);
-ulong __ovld sub_group_reduce_min(ulong x);
-float __ovld sub_group_reduce_min(float x);
-int __ovld sub_group_reduce_max(int x);
-uint __ovld sub_group_reduce_max(uint x);
-long __ovld sub_group_reduce_max(long x);
-ulong __ovld sub_group_reduce_max(ulong x);
-float __ovld sub_group_reduce_max(float x);
-
-int __ovld sub_group_scan_exclusive_add(int x);
-uint __ovld sub_group_scan_exclusive_add(uint x);
-long __ovld sub_group_scan_exclusive_add(long x);
-ulong __ovld sub_group_scan_exclusive_add(ulong x);
-float __ovld sub_group_scan_exclusive_add(float x);
-int __ovld sub_group_scan_exclusive_min(int x);
-uint __ovld sub_group_scan_exclusive_min(uint x);
-long __ovld sub_group_scan_exclusive_min(long x);
-ulong __ovld sub_group_scan_exclusive_min(ulong x);
-float __ovld sub_group_scan_exclusive_min(float x);
-int __ovld sub_group_scan_exclusive_max(int x);
-uint __ovld sub_group_scan_exclusive_max(uint x);
-long __ovld sub_group_scan_exclusive_max(long x);
-ulong __ovld sub_group_scan_exclusive_max(ulong x);
-float __ovld sub_group_scan_exclusive_max(float x);
-
-int __ovld sub_group_scan_inclusive_add(int x);
-uint __ovld sub_group_scan_inclusive_add(uint x);
-long __ovld sub_group_scan_inclusive_add(long x);
-ulong __ovld sub_group_scan_inclusive_add(ulong x);
-float __ovld sub_group_scan_inclusive_add(float x);
-int __ovld sub_group_scan_inclusive_min(int x);
-uint __ovld sub_group_scan_inclusive_min(uint x);
-long __ovld sub_group_scan_inclusive_min(long x);
-ulong __ovld sub_group_scan_inclusive_min(ulong x);
-float __ovld sub_group_scan_inclusive_min(float x);
-int __ovld sub_group_scan_inclusive_max(int x);
-uint __ovld sub_group_scan_inclusive_max(uint x);
-long __ovld sub_group_scan_inclusive_max(long x);
-ulong __ovld sub_group_scan_inclusive_max(ulong x);
-float __ovld sub_group_scan_inclusive_max(float x);
+int __ovld __conv sub_group_all(int predicate);
+int __ovld __conv sub_group_any(int predicate);
+
+int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);
+uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);
+long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);
+ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);
+float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);
+
+int __ovld __conv sub_group_reduce_add(int x);
+uint __ovld __conv sub_group_reduce_add(uint x);
+long __ovld __conv sub_group_reduce_add(long x);
+ulong __ovld __conv sub_group_reduce_add(ulong x);
+float __ovld __conv sub_group_reduce_add(float x);
+int __ovld __conv sub_group_reduce_min(int x);
+uint __ovld __conv sub_group_reduce_min(uint x);
+long __ovld __conv sub_group_reduce_min(long x);
+ulong __ovld __conv sub_group_reduce_min(ulong x);
+float __ovld __conv sub_group_reduce_min(float x);
+int __ovld __conv sub_group_reduce_max(int x);
+uint __ovld __conv sub_group_reduce_max(uint x);
+long __ovld __conv sub_group_reduce_max(long x);
+ulong __ovld __conv sub_group_reduce_max(ulong x);
+float __ovld __conv sub_group_reduce_max(float x);
+
+int __ovld __conv sub_group_scan_exclusive_add(int x);
+uint __ovld __conv sub_group_scan_exclusive_add(uint x);
+long __ovld __conv sub_group_scan_exclusive_add(long x);
+ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);
+float __ovld __conv sub_group_scan_exclusive_add(float x);
+int __ovld __conv sub_group_scan_exclusive_min(int x);
+uint __ovld __conv sub_group_scan_exclusive_min(uint x);
+long __ovld __conv sub_group_scan_exclusive_min(long x);
+ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);
+float __ovld __conv sub_group_scan_exclusive_min(float x);
+int __ovld __conv sub_group_scan_exclusive_max(int x);
+uint __ovld __conv sub_group_scan_exclusive_max(uint x);
+long __ovld __conv sub_group_scan_exclusive_max(long x);
+ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);
+float __ovld __conv sub_group_scan_exclusive_max(float x);
+
+int __ovld __conv sub_group_scan_inclusive_add(int x);
+uint __ovld __conv sub_group_scan_inclusive_add(uint x);
+long __ovld __conv sub_group_scan_inclusive_add(long x);
+ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);
+float __ovld __conv sub_group_scan_inclusive_add(float x);
+int __ovld __conv sub_group_scan_inclusive_min(int x);
+uint __ovld __conv sub_group_scan_inclusive_min(uint x);
+long __ovld __conv sub_group_scan_inclusive_min(long x);
+ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);
+float __ovld __conv sub_group_scan_inclusive_min(float x);
+int __ovld __conv sub_group_scan_inclusive_max(int x);
+uint __ovld __conv sub_group_scan_inclusive_max(uint x);
+long __ovld __conv sub_group_scan_inclusive_max(long x);
+ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);
+float __ovld __conv sub_group_scan_inclusive_max(float x);
#ifdef cl_khr_fp16
-half __ovld sub_group_broadcast(half x, uint sub_group_local_id);
-half __ovld sub_group_reduce_add(half x);
-half __ovld sub_group_reduce_min(half x);
-half __ovld sub_group_reduce_max(half x);
-half __ovld sub_group_scan_exclusive_add(half x);
-half __ovld sub_group_scan_exclusive_min(half x);
-half __ovld sub_group_scan_exclusive_max(half x);
-half __ovld sub_group_scan_inclusive_add(half x);
-half __ovld sub_group_scan_inclusive_min(half x);
-half __ovld sub_group_scan_inclusive_max(half x);
+half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);
+half __ovld __conv sub_group_reduce_add(half x);
+half __ovld __conv sub_group_reduce_min(half x);
+half __ovld __conv sub_group_reduce_max(half x);
+half __ovld __conv sub_group_scan_exclusive_add(half x);
+half __ovld __conv sub_group_scan_exclusive_min(half x);
+half __ovld __conv sub_group_scan_exclusive_max(half x);
+half __ovld __conv sub_group_scan_inclusive_add(half x);
+half __ovld __conv sub_group_scan_inclusive_min(half x);
+half __ovld __conv sub_group_scan_inclusive_max(half x);
#endif //cl_khr_fp16
#ifdef cl_khr_fp64
-double __ovld sub_group_broadcast(double x, uint sub_group_local_id);
-double __ovld sub_group_reduce_add(double x);
-double __ovld sub_group_reduce_min(double x);
-double __ovld sub_group_reduce_max(double x);
-double __ovld sub_group_scan_exclusive_add(double x);
-double __ovld sub_group_scan_exclusive_min(double x);
-double __ovld sub_group_scan_exclusive_max(double x);
-double __ovld sub_group_scan_inclusive_add(double x);
-double __ovld sub_group_scan_inclusive_min(double x);
-double __ovld sub_group_scan_inclusive_max(double x);
+double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);
+double __ovld __conv sub_group_reduce_add(double x);
+double __ovld __conv sub_group_reduce_min(double x);
+double __ovld __conv sub_group_reduce_max(double x);
+double __ovld __conv sub_group_scan_exclusive_add(double x);
+double __ovld __conv sub_group_scan_exclusive_min(double x);
+double __ovld __conv sub_group_scan_exclusive_max(double x);
+double __ovld __conv sub_group_scan_inclusive_add(double x);
+double __ovld __conv sub_group_scan_inclusive_min(double x);
+double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_fp64
#endif //cl_khr_subgroups cl_intel_subgroups
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index df5720fc2c7..917d414f967 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5857,6 +5857,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case AttributeList::AT_NoDuplicate:
handleSimpleAttribute<NoDuplicateAttr>(S, D, Attr);
break;
+ case AttributeList::AT_Convergent:
+ handleSimpleAttribute<ConvergentAttr>(S, D, Attr);
+ break;
case AttributeList::AT_NoInline:
handleSimpleAttribute<NoInlineAttr>(S, D, Attr);
break;
diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
new file mode 100644
index 00000000000..d2ae31fb741
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+void convfun(void) __attribute__((convergent));
+void non_convfun(void);
+void nodupfun(void) __attribute__((noduplicate));
+
+void f(void);
+void g(void);
+
+// Test two if's are merged and non_convfun duplicated.
+// The LLVM IR is equivalent to:
+// if (a) {
+// f();
+// non_convfun();
+// g();
+// } else {
+// non_convfun();
+// }
+//
+// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]])
+// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
+// CHECK: [[if_then]]:
+// CHECK: tail call spir_func void @f()
+// CHECK: tail call spir_func void @non_convfun()
+// CHECK: tail call spir_func void @g()
+// CHECK: br label %[[if_end3:.+]]
+// CHECK: [[if_end3_critedge]]:
+// CHECK: tail call spir_func void @non_convfun()
+// CHECK: br label %[[if_end3]]
+// CHECK: [[if_end3]]:
+// CHECK-LABEL: ret void
+
+void test_merge_if(int a) {
+ if (a) {
+ f();
+ }
+ non_convfun();
+ if (a) {
+ g();
+ }
+}
+
+// CHECK-DAG: declare spir_func void @f()
+// CHECK-DAG: declare spir_func void @non_convfun()
+// CHECK-DAG: declare spir_func void @g()
+
+// Test two if's are not merged.
+// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]])
+// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
+// CHECK: [[if_then]]:
+// CHECK: tail call spir_func void @f()
+// CHECK-NOT: call spir_func void @convfun()
+// CHECK-NOT: call spir_func void @g()
+// CHECK: br label %[[if_end]]
+// CHECK: [[if_end]]:
+// CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
+// CHECK: tail call spir_func void @convfun() #[[attr5:.+]]
+// CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
+// CHECK: [[if_then2]]:
+// CHECK: tail call spir_func void @g()
+// CHECK: br label %[[if_end3:.+]]
+// CHECK: [[if_end3]]:
+// CHECK-LABEL: ret void
+
+void test_no_merge_if(int a) {
+ if (a) {
+ f();
+ }
+ convfun();
+ if(a) {
+ g();
+ }
+}
+
+// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]]
+
+// Test loop is unrolled for convergent function.
+// CHECK-LABEL: define spir_func void @test_unroll()
+// CHECK: tail call spir_func void @convfun() #[[attr5:[0-9]+]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK-LABEL: ret void
+
+void test_unroll() {
+ for (int i = 0; i < 10; i++)
+ convfun();
+}
+
+// Test loop is not unrolled for noduplicate function.
+// CHECK-LABEL: define spir_func void @test_not_unroll()
+// CHECK: br label %[[for_body:.+]]
+// CHECK: [[for_cond_cleanup:.+]]:
+// CHECK: ret void
+// CHECK: [[for_body]]:
+// CHECK: tail call spir_func void @nodupfun() #[[attr6:[0-9]+]]
+// CHECK-NOT: call spir_func void @nodupfun()
+// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+
+void test_not_unroll() {
+ for (int i = 0; i < 10; i++)
+ nodupfun();
+}
+
+// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
+
+// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} }
+// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} }
diff --git a/clang/test/SemaOpenCL/convergent.cl b/clang/test/SemaOpenCL/convergent.cl
new file mode 100644
index 00000000000..1b7fda41fc0
--- /dev/null
+++ b/clang/test/SemaOpenCL/convergent.cl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -fsyntax-only -verify %s
+
+void f1(void) __attribute__((convergent));
+
+void f2(void) __attribute__((convergent(1))); // expected-error {{'convergent' attribute takes no arguments}}
+
+void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions}}
+
+void f4(void) {
+ int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions}}
+}
+
OpenPOWER on IntegriCloud