diff options
Diffstat (limited to 'clang/test')
| -rw-r--r-- | clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 177 | ||||
| -rw-r--r-- | clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl | 31 | 
2 files changed, 66 insertions, 142 deletions
| diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index e064a28ba0f..d74a1dfbd48 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -1,6 +1,5 @@  // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32  // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64 -// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES  #pragma OPENCL EXTENSION cl_khr_subgroups : enable @@ -47,31 +46,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]    clk_event_t event_wait_list2[] = {clk_event}; -  // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4 - -  // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32] -  // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] -  // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32] -  // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] -  // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32] -  // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] -  // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32] -  // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] -  // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32] -  // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] -  // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32] -  // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] -  // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32] -  // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] -  // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] -    // Emits block literal on stack and block kernel [[INVLK1]]. +  // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags    // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* @@ -97,6 +73,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) +    enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,                   ^(void) {                     a[i] = b[i]; @@ -105,46 +82,39 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // Emits global block literal [[BLG1]] and block kernel [[INVGK1]].    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES1]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES1]], i32 0, i32 0 -  // B32: store i32 256, i32* %[[TMP]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i32 0, i32 0 -  // B64: store i64 256, i64* %[[TMP]], align 8 +  // B32: %[[TMP:.*]] = alloca [1 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 256, i32* %[[TMP1]], align 4 +  // B64: %[[TMP:.*]] = alloca [1 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 256, i64* %[[TMP1]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(    // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange,                   ^(local void *p) {                     return;                   },                   256); -    char c;    // Emits global block literal [[BLG2]] and block kernel [[INVGK2]].    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES2]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES2]], i32 0, i32 0 -  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i32 0, i32 0 -  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 +  // B32: %[[TMP:.*]] = alloca [1 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 +  // B64: %[[TMP:.*]] = alloca [1 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(    // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange,                   ^(local void *p) {                     return; @@ -157,21 +127,18 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0    // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*    // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES3]], i32 0, i32 0 -  // B32: store i32 256, i32* %[[TMP]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i32 0, i32 0 -  // B64: store i64 256, i64* %[[TMP]], align 8 +  // B32: %[[TMP:.*]] = alloca [1 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 256, i32* %[[TMP1]], align 4 +  // B64: %[[TMP:.*]] = alloca [1 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 256, i64* %[[TMP1]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,                   ^(local void *p) {                     return; @@ -184,21 +151,18 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0    // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)*    // COMMON: [[EVNT:%[0-9]+]]  = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES4]], i32 0, i32 0 -  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i32 0, i32 0 -  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 +  // B32: %[[TMP:.*]] = alloca [1 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 +  // B64: %[[TMP:.*]] = alloca [1 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,                   ^(local void *p) {                     return; @@ -209,21 +173,18 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // Emits global block literal [[BLG5]] and block kernel [[INVGK5]].    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES5]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES5]], i32 0, i32 0 -  // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i32 0, i32 0 -  // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 +  // B32: %[[TMP:.*]] = alloca [1 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 +  // B64: %[[TMP:.*]] = alloca [1 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange,                   ^(local void *p) {                     return; @@ -233,29 +194,26 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // Emits global block literal [[BLG6]] and block kernel [[INVGK6]].    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]* %[[BLOCK_SIZES6]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 0 -  // B32: store i32 1, i32* %[[TMP]], align 4 -  // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 1 -  // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4 -  // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 2 -  // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 0 -  // B64: store i64 1, i64* %[[TMP]], align 8 -  // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 1 -  // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8 -  // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 2 -  // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8 +  // B32: %[[TMP:.*]] = alloca [3 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 1, i32* %[[TMP1]], align 4 +  // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 1 +  // B32: store i32 2, i32* %[[TMP2]], align 4 +  // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 2 +  // B32: store i32 4, i32* %[[TMP3]], align 4 +  // B64: %[[TMP:.*]] = alloca [3 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 1, i64* %[[TMP1]], align 8 +  // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 1 +  // B64: store i64 2, i64* %[[TMP2]], align 8 +  // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 2 +  // B64: store i64 4, i64* %[[TMP3]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange,                   ^(local void *p1, local void *p2, local void *p3) {                     return; @@ -265,21 +223,18 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {    // Emits global block literal [[BLG7]] and block kernel [[INVGK7]].    // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue    // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags -  // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES7]] to i8* -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i64 0, i64 0 -  // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( -  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) -  // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES7]], i32 0, i32 0 -  // B32: store i32 0, i32* %[[TMP]], align 4 -  // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i32 0, i32 0 -  // B64: store i64 4294967296, i64* %[[TMP]], align 8 +  // B32: %[[TMP:.*]] = alloca [1 x i32] +  // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 +  // B32: store i32 0, i32* %[[TMP1]], align 4 +  // B64: %[[TMP:.*]] = alloca [1 x i64] +  // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 +  // B64: store i64 4294967296, i64* %[[TMP1]], align 8    // COMMON-LABEL: call i32 @__enqueue_kernel_varargs    // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),    // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, -  // B32-SAME: i32* %[[TMP]]) -  // B64-SAME: i64* %[[TMP]]) +  // B32-SAME: i32* %[[TMP1]]) +  // B64-SAME: i64* %[[TMP1]])    enqueue_kernel(default_queue, flags, ndrange,                   ^(local void *p) {                     return; diff --git a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl deleted file mode 100644 index 00c4755c1cc..00000000000 --- a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=COMMON,AMDGPU -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR32 -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR64 -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=CHECK-DEBUG - -// Check that the enqueue_kernel array temporary is in the entry block to avoid -// a dynamic alloca - -typedef struct {int a;} ndrange_t; - -kernel void test(int i) { -// COMMON-LABEL: define {{.*}} void @test -// COMMON-LABEL: entry: -// AMDGPU: %block_sizes = alloca [1 x i64] -// SPIR32: %block_sizes = alloca [1 x i32] -// SPIR64: %block_sizes = alloca [1 x i64] -// COMMON-LABEL: if.then: -// COMMON-NOT: alloca -// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34 -// COMMON-LABEL: if.end -  queue_t default_queue; -  unsigned flags = 0; -  ndrange_t ndrange; -  if (i) -    enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) { }, 32); -} - -// Check that the temporary is scoped to the `if` - -// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1, line: 24) -// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32) | 

