summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Driver/Options.td2
-rw-r--r--clang/include/clang/Frontend/CodeGenOptions.def1
-rw-r--r--clang/lib/CodeGen/CGCall.cpp15
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp1
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp2
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl2
-rw-r--r--clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl16
-rw-r--r--clang/test/CodeGenOpenCL/convergent.cl5
-rw-r--r--clang/test/Driver/opencl.cl2
9 files changed, 43 insertions, 3 deletions
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a1b9810c547..50865f22f86 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -518,6 +518,8 @@ def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">;
+def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,
+ HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">;
def client__name : JoinedOrSeparate<["-"], "client_name">;
def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>;
def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">;
diff --git a/clang/include/clang/Frontend/CodeGenOptions.def b/clang/include/clang/Frontend/CodeGenOptions.def
index 599b57c189f..e987252b2ff 100644
--- a/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/clang/include/clang/Frontend/CodeGenOptions.def
@@ -128,6 +128,7 @@ CODEGENOPT(NoTrappingMath , 1, 0) ///< Set when -fno-trapping-math is enabled
CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN.
CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size
CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
/// \brief Method of Objective-C dispatch to use.
ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index d62b080e31c..959df69f22e 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1870,6 +1870,21 @@ void CodeGenModule::ConstructAttributeList(
}
}
+ if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+ if (getLangOpts().OpenCLVersion <= 120) {
+ // OpenCL v1.2 Work groups are always uniform
+ FuncAttrs.addAttribute("uniform-work-group-size", "true");
+ } else {
+ // OpenCL v2.0 Work groups may be whether uniform or not.
+ // '-cl-uniform-work-group-size' compile option gets a hint
+ // to the compiler that the global work-size be a multiple of
+ // the work-group size specified to clEnqueueNDRangeKernel
+ // (i.e. work groups are uniform).
+ FuncAttrs.addAttribute("uniform-work-group-size",
+ llvm::toStringRef(CodeGenOpts.UniformWGSize));
+ }
+ }
+
if (!AttrOnCallSite) {
bool DisableTailCalls =
CodeGenOpts.DisableTailCalls ||
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 64bea9e56ab..cc5cf0f042f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2379,6 +2379,7 @@ static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs) {
options::OPT_cl_no_signed_zeros,
options::OPT_cl_denorms_are_zero,
options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
+ options::OPT_cl_uniform_work_group_size
};
if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 3bdc116b963..5be02c96826 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -659,6 +659,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
Opts.CorrectlyRoundedDivSqrt =
Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt);
+ Opts.UniformWGSize =
+ Args.hasArg(OPT_cl_uniform_work_group_size);
Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math);
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 1027fd740c5..aec00e76014 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -425,7 +425,7 @@ struct_char_arr32 func_ret_struct_char_arr32()
return s;
}
-// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 {
+// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 {
// CHECK: ret i32 0
transparent_u func_transparent_union_ret()
{
diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
new file mode 100644
index 00000000000..76ace5dca21
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+
+kernel void ker() {};
+// CHECK: define{{.*}}@ker() #0
+
+void foo() {};
+// CHECK: define{{.*}}@foo() #1
+
+// CHECK-LABEL: attributes #0
+// CHECK-UNIFORM: "uniform-work-group-size"="true"
+// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
+
+// CHECK-LABEL: attributes #1
+// CHECK-NOT: uniform-work-group-size
diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index 285b637ca68..a011920761f 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -127,7 +127,7 @@ void test_not_unroll() {
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
// CHECK-LABEL: @assume_convergent_asm
-// CHECK: tail call void asm sideeffect "s_barrier", ""() #4
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
kernel void assume_convergent_asm()
{
__asm__ volatile("s_barrier");
@@ -138,4 +138,5 @@ kernel void assume_convergent_asm()
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl
index d68d424b6e3..8c421beeefe 100644
--- a/clang/test/Driver/opencl.cl
+++ b/clang/test/Driver/opencl.cl
@@ -13,6 +13,7 @@
// RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s
// RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
+// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
// RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
// RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
@@ -31,6 +32,7 @@
// CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"
// CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero"
// CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
+// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size"
// CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
// CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'
OpenPOWER on IntegriCloud