diff options
Diffstat (limited to 'clang/test')
9 files changed, 399 insertions, 53 deletions
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index 7ef4f8af3de..6b08993c194 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -35,9 +35,19 @@ int maini1() { // CHECK-NOT: @__kmpc_data_sharing_push_stack // CHECK: define {{.*}}[[BAR]]() +// CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]], +// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() +// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 +// CHECK: br i1 [[IS_SPMD]], label +// CHECK: br label // CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0) -// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]* -// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST]]* +// CHECK: br label +// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[GLOBALS]], {{.+}} ] +// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]]) -// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]]) +// CHECK: br i1 [[IS_SPMD]], label +// CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8* +// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]]) +// CHECK: br label // CHECK: ret i32 diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp new file mode 100644 index 00000000000..615dc30bdc7 --- /dev/null +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -0,0 +1,328 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 +// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1 +// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 + +void foo() { +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams distribute parallel for simd + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +int a; +// CHECK: call void @__kmpc_kernel_init( +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams distribute parallel for lastprivate(a) + for (int i = 0; i < 10; ++i) + a = i; +#pragma omp target teams distribute parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams distribute parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams +#pragma omp distribute parallel for simd + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target teams +#pragma omp distribute parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target teams +#pragma omp distribute parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target parallel +#pragma omp for simd + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target parallel +#pragma omp for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target +#pragma omp parallel +#pragma omp for simd ordered + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel +#pragma omp for simd schedule(guided) + for (int i = 0; i < 10; ++i) + ; +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +#pragma omp target +#pragma omp parallel for + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(static) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(static, 1) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(auto) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < 10; ++i) + ; +#pragma omp target +#pragma omp parallel for schedule(guided) + for (int i = 0; i < 10; ++i) + ; +} + +#endif + diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 5f9b3bd3280..e4049782906 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -554,13 +554,20 @@ int baz(int f, double &a) { // CHECK: ret void // CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) + // CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]], // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* // CHECK: [[GTID_ADDR:%.+]] = alloca i32, // CHECK: store i32 0, i32* [[ZERO_ADDR]] + // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() + // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 + // CHECK: br i1 [[IS_SPMD]], label + // CHECK: br label // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) - // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* - // CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0 + // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST]]* + // CHECK: br label + // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[REC_ADDR]], {{.+}} ] + // CHECK: [[F_PTR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0 // CHECK: store i32 %{{.+}}, i32* [[F_PTR]], // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() @@ -595,7 +602,12 @@ int baz(int f, double &a) { // CHECK: br label // CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]], - // CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]]) + // CHECK: store i32 [[RES]], i32* [[RET:%.+]], + // CHECK: br i1 [[IS_SPMD]], label + // CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8* + // CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[BC]]) + // CHECK: br label + // CHECK: [[RES:%.+]] = load i32, i32* [[RET]], // CHECK: ret i32 [[RES]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index 15611f64758..6fccfbed562 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -59,7 +59,7 @@ int bar(int n){ // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], + // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -102,7 +102,7 @@ int bar(int n){ // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], + // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 1cdcb70417b..163679d92b6 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -47,7 +47,7 @@ int bar(int n){ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}( - // CHECK: call void @__kmpc_spmd_kernel_init( + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -69,7 +69,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}( - // CHECK: call void @__kmpc_spmd_kernel_init( + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // @@ -90,7 +90,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}( - // CHECK: call void @__kmpc_spmd_kernel_init( + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index 02676272b0f..a3790f2851f 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -54,7 +54,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( // - // CHECK: call void @__kmpc_spmd_kernel_init( + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // @@ -242,7 +242,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( // - // CHECK: call void @__kmpc_spmd_kernel_init( + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // @@ -520,7 +520,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // - // CHECK: call void @__kmpc_spmd_kernel_init( + // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index c7667d83b9a..5d3088d019d 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -227,13 +227,13 @@ int bar(int n){ // CHECK: ret void // CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37( -// CHECK: call void @__kmpc_spmd_kernel_init( +// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 1) // CHECK: call void @__kmpc_data_sharing_init_stack_spmd -// CHECK: call i8* @__kmpc_data_sharing_push_stack( +// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( // CHECK-NOT: call void @__kmpc_serialized_parallel( // CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}}) // CHECK-NOT: call void @__kmpc_end_serialized_parallel( -// CHECK: call void @__kmpc_data_sharing_pop_stack( +// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack( // CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index f74919fc828..bbcb19d77d0 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -8,12 +8,13 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0 +// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1 +// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0 #define N 1000 #define M 10 @@ -67,14 +68,14 @@ int bar(int n){ return a; } -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker() + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33( +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1) // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK: define internal void [[OUTL1]]( @@ -84,8 +85,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: {{call|invoke}} void [[OUTL2:@.+]]( // CHECK: call void @__kmpc_for_static_fini( @@ -99,8 +99,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: {{call|invoke}} void [[OUTL3:@.+]]( // CHECK: call void @__kmpc_for_static_fini( @@ -115,8 +114,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: {{call|invoke}} void [[OUTL4:@.+]]( @@ -129,7 +127,7 @@ int bar(int n){ // CHECK: call void @__kmpc_for_static_fini( // CHECK: ret void -// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) +// CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}}) // CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}}) // CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}}) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 0b945a69b05..86768b04ae5 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -8,11 +8,12 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 +// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1 +// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0 #define N 1000 #define M 10 @@ -62,14 +63,14 @@ int bar(int n){ return a; } -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( -// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l31_worker() + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l31( +// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1) // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK: define internal void [[OUTL1]]( @@ -79,8 +80,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: {{call|invoke}} void [[OUTL2:@.+]]( // CHECK: call void @__kmpc_for_static_fini( @@ -94,8 +94,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, // CHECK: {{call|invoke}} void [[OUTL3:@.+]]( // CHECK: call void @__kmpc_for_static_fini( @@ -110,8 +109,7 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: {{call|invoke}} void [[OUTL4:@.+]]( |