diff options
| author | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2019-01-09 20:38:35 +0000 |
|---|---|---|
| committer | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2019-01-09 20:38:35 +0000 |
| commit | a3afcf24453123abe7c1ba3d984b19cd64b206a7 (patch) | |
| tree | 4c8968b45ab81f0718eedca561e0c3c0b0d857c1 /clang/test | |
| parent | 2819d4d19c88b0ec9f1d76dd3beec0aea2076a44 (diff) | |
| download | bcm5719-llvm-a3afcf24453123abe7c1ba3d984b19cd64b206a7.tar.gz bcm5719-llvm-a3afcf24453123abe7c1ba3d984b19cd64b206a7.zip | |
[OpenMP] Add flag for preventing the extension to 64 bits for the collapse loop counter
Summary: Introduce a compiler flag for cases when the user knows that the collapsed loop counter can be safely represented using at most 32 bits. This will prevent the emission of expensive mathematical operations (such as the div operation) on the iteration variable using 64 bits where 32 bit operations are sufficient.
Reviewers: ABataev, caomhin
Reviewed By: ABataev
Subscribers: hfinkel, kkwli0, guansong, cfe-commits
Differential Revision: https://reviews.llvm.org/D55928
llvm-svn: 350758
Diffstat (limited to 'clang/test')
| -rw-r--r-- | clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index c4df348329f..fe0c8dfb63f 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -1,6 +1,7 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix CHECK-DIV64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -fopenmp-optimistic-collapse -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-DIV32 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 @@ -9,11 +10,12 @@ #define HEADER // Check that the execution mode of all 5 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l45}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l50}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l58}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l65}}_exec_mode = weak constant i8 0 #define N 1000 #define M 10 @@ -80,7 +82,7 @@ int bar(int n){ // CHECK-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4 // CHECK-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1 -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32( +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l34( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**)) @@ -214,16 +216,19 @@ int bar(int n){ // CHECK: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK: ret void -// CHECK: define internal void [[OUTL4]]( +// CHECK-32: define internal void [[OUTL4]]( +// CHECK-64: define internal void [[OUTL4]]( // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, // CHECK: call void @__kmpc_for_static_fini( // CHECK: ret void -// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{.*}}) +// CHECK: define weak void @__omp_offloading_{{.*}}_l58(i[[SZ:64|32]] %{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{.*}}) // CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [10 x [10 x i32]]* %{{.*}}) // CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x [10 x i32]]* dereferenceable{{.*}}) +// CHECK-DIV64: div i64 +// CHECK-DIV32-NO: div i64 -// CHECK: define weak void @__omp_offloading_{{.*}}_l63(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) +// CHECK: define weak void @__omp_offloading_{{.*}}_l65(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) // CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}}) // CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}}) |

