diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2018-10-09 14:49:00 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-10-09 14:49:00 +0000 |
commit | 9ea3c385973c395c4798145f71ff3ae34ccaabeb (patch) | |
tree | 0a2f511ca7fb094143065d6ee32b2976e7079a80 /clang/test/OpenMP | |
parent | f5fac1826a867d5e65310456670cc1d5a495ed02 (diff) | |
download | bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.tar.gz bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.zip |
[OPENMP][NVPTX] Support memory coalescing for globalized variables.
Added support for memory coalescing for better performance for
globalized variables. From now on all the globalized variables are
represented as arrays of 32 elements and each thread accesses these
elements using `tid & 31` as index.
llvm-svn: 344049
Diffstat (limited to 'clang/test/OpenMP')
-rw-r--r-- | clang/test/OpenMP/declare_target_codegen_globalization.cpp | 13 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_data_sharing.cpp | 12 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp | 7 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_parallel_codegen.cpp | 7 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_target_codegen.cpp | 15 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_teams_codegen.cpp | 28 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp | 16 |
7 files changed, 66 insertions, 32 deletions
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index 6b08993c194..8d2a4da7737 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -35,16 +35,21 @@ int maini1() { // CHECK-NOT: @__kmpc_data_sharing_push_stack // CHECK: define {{.*}}[[BAR]]() -// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]], +// CHECK: alloca i32, +// CHECK: [[A_LOCAL_ADDR:%.+]] = alloca i32, // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 // CHECK: br i1 [[IS_SPMD]], label // CHECK: br label -// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0) -// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST]]* +// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 128, i16 0) +// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%.+]]* // CHECK: br label -// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[GLOBALS]], {{.+}} ] +// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[GLOBALS]], {{.+}} ] // CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: [[LID:%.+]] = and i32 [[TID]], 31 +// CHECK: [[A_GLOBAL_ADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A_ADDR]], i32 0, i32 [[LID]] +// CHECK: [[A_ADDR:%.+]] = select i1 [[IS_SPMD]], i32* [[A_LOCAL_ADDR]], i32* [[A_GLOBAL_ADDR]] // CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]]) // CHECK: br i1 [[IS_SPMD]], label // CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8* diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index 0acb1199152..83206769243 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -39,10 +39,16 @@ void test_ds(){ // CK1: [[SHAREDARGS2:%.+]] = alloca i8** // CK1: call void @__kmpc_kernel_init // CK1: call void @__kmpc_data_sharing_init_stack -// CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 0) +// CK1: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 256, i16 0) // CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty* -// CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0 -// CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1 +// CK1: [[A_ARR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0 +// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK1: [[LID:%.+]] = and i32 [[TID]], 31 +// CK1: [[A:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A_ARR]], i32 0, i32 [[LID]] +// CK1: [[B_ARR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1 +// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK1: [[LID:%.+]] = and i32 [[TID]], 31 +// CK1: [[B:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[B_ARR]], i32 0, i32 [[LID]] // CK1: store i32 10, i32* [[A]] // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1) diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 2b18f6d3f9b..2a7344f7734 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -26,10 +26,13 @@ int main(int argc, char **argv) { // CHECK-LABEL: define internal void @__omp_offloading_{{.*}}_main_l17_worker( // CHECK: define weak void @__omp_offloading_{{.*}}_main_l17([10 x i32]* dereferenceable(40) %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i{{64|32}} %{{.+}}, [10 x i32]* dereferenceable(40) %{{.+}}) -// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 84, i16 0) +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 2688, i16 0) // CHECK: [[STACK:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* // CHECK: [[ARGC:%.+]] = load i32, i32* %{{.+}}, align -// CHECK: [[ARGC_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[ARGC_ARR_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0 +// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: [[LID:%.+]] = and i32 [[TID]], 31 +// CHECK: [[ARGC_ADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGC_ARR_ADDR]], i32 0, i32 [[LID]] // CHECK: store i32 [[ARGC]], i32* [[ARGC_ADDR]], // CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 2 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index d1a3104407d..978804d2495 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -318,11 +318,14 @@ int bar(int n){ // CHECK-32: [[A_ADDR:%.+]] = alloca i32, // CHECK-64: [[A_ADDR:%.+]] = alloca i64, // CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK: [[STACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) +// CHECK: [[STACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0) // CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty* // CHECK-32: [[A:%.+]] = load i32, i32* [[A_ADDR]], // CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]], -// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[GLOBAL_A_ADDR_ARR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: [[LID:%.+]] = and i32 [[TID]], 31 +// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[GLOBAL_A_ADDR_ARR]], i32 0, i32 [[LID]] // CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]], // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}}) diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 01f346121a4..69c338995b7 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -554,7 +554,8 @@ int baz(int f, double &a) { // CHECK: ret void // CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) - // CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]], + // CHECK: alloca i32, + // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32, // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* // CHECK: store i32 0, i32* [[ZERO_ADDR]] @@ -562,11 +563,15 @@ int baz(int f, double &a) { // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 // CHECK: br i1 [[IS_SPMD]], label // CHECK: br label - // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) - // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST]]* + // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0) + // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]* // CHECK: br label - // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[REC_ADDR]], {{.+}} ] - // CHECK: [[F_PTR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0 + // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[REC_ADDR]], {{.+}} ] + // CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0 + // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + // CHECK: [[LID:%.+]] = and i32 [[TID]], 31 + // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]] + // CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], i32* [[GLOBAL_F_PTR]] // CHECK: store i32 %{{.+}}, i32* [[F_PTR]], // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index 4e3f2674fc5..91b372c65b6 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -36,10 +36,13 @@ int main (int argc, char **argv) { // CK1: store {{.+}} 0, {{.+}}, // CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]], // CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}* -// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) +// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0) // CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]] // CK1-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]] -// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK1: [[LID:%.+]] = and i32 [[TID]], 31 +// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK1: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]], // CK1: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]], // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]], @@ -53,9 +56,12 @@ int main (int argc, char **argv) { // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***, // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**, // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]] -// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0) +// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0) // CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] -// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK1: [[LID:%.+]] = and i32 [[TID]], 31 +// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]], // CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]], // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]], @@ -111,10 +117,13 @@ int main (int argc, char **argv) { // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32* // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32* // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32* -// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) +// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0) // CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]] // CK2-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]] -// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK2: [[LID:%.+]] = and i32 [[TID]], 31 +// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]], // CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]], @@ -132,9 +141,12 @@ int main (int argc, char **argv) { // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]], -// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0) +// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0) // CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] -// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK2: [[LID:%.+]] = and i32 [[TID]], 31 +// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]], // CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]], diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index 6925b17163a..b73b5a8e93e 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -597,9 +597,9 @@ int bar(int n){ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1 // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128 // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128 // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], @@ -643,9 +643,9 @@ int bar(int n){ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1 // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128 // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128 // // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] @@ -1024,9 +1024,9 @@ int bar(int n){ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4 // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128 // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128 // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], @@ -1072,9 +1072,9 @@ int bar(int n){ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4 // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128 // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128 // // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] |