summaryrefslogtreecommitdiffstats
path: root/clang/test/OpenMP/nvptx_target_codegen.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/test/OpenMP/nvptx_target_codegen.cpp')
-rw-r--r--clang/test/OpenMP/nvptx_target_codegen.cpp372
1 files changed, 211 insertions, 161 deletions
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 287089d7c45..100f79bc03f 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -8,9 +8,6 @@
#ifndef HEADER
#define HEADER
-// CHECK-DAG: [[OMP_NT:@.+]] = common addrspace(3) global i32 0
-// CHECK-DAG: [[OMP_WID:@.+]] = common addrspace(3) global i64 0
-
template<typename tx, typename ty>
struct TT{
tx X;
@@ -26,19 +23,22 @@ int foo(int n) {
double cn[5][n];
TT<long long, char> d;
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l87}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l93}}_worker()
+ // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
+ // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
+ // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
+ // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @llvm.nvvm.barrier0()
- // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
- // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
+ // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
+ // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null
// CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
//
// CHECK: [[SEL_WORKERS]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
- // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
+ // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]],
+ // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0
// CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
//
// CHECK: [[EXEC_PARALLEL]]
@@ -54,31 +54,37 @@ int foo(int n) {
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l87]]()
- // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
- // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
- // CHECK: [[MID:%.+]] = and i32 [[B]],
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
- // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
- //
- // CHECK: [[CHECK_WORKER]]
- // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
- // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
+ // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l93]]()
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
// CHECK: {{call|invoke}} void [[T1]]_worker()
- // CHECK: br label {{%?}}[[EXIT]]
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[CMTMP1:%.+]] = sub i32 [[CMWS]], 1
+ // CHECK: [[CMTMP2:%.+]] = sub i32 [[CMNTH]], 1
+ // CHECK: [[MID:%.+]] = and i32 [[CMTMP2]],
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], [[MID]]
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
//
// CHECK: [[MASTER]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
- // CHECK: br label {{%?}}[[TERM:.+]]
- //
- // CHECK: [[TERM]]
- // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
+ //
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit()
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK: br label {{%?}}[[EXIT]]
//
@@ -93,19 +99,22 @@ int foo(int n) {
{
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l158}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l173}}_worker()
+ // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
+ // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
+ // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
+ // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @llvm.nvvm.barrier0()
- // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
- // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
+ // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
+ // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null
// CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
//
// CHECK: [[SEL_WORKERS]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
- // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
+ // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]],
+ // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0
// CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
//
// CHECK: [[EXEC_PARALLEL]]
@@ -121,35 +130,41 @@ int foo(int n) {
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l158]](i[[SZ:32|64]] [[ARG1:%[^)]+]])
+ // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l173]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]])
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]],
// CHECK: store i[[SZ]] [[ARG1]], i[[SZ]]* [[AA_ADDR]],
// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
- // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
- // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
- // CHECK: [[MID:%.+]] = and i32 [[B]],
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
- // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
- //
- // CHECK: [[CHECK_WORKER]]
- // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
- // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
- // CHECK: {{call|invoke}} void [[T3]]_worker()
- // CHECK: br label {{%?}}[[EXIT]]
+ // CHECK: {{call|invoke}} void [[T2]]_worker()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[CMTMP1:%.+]] = sub i32 [[CMWS]], 1
+ // CHECK: [[CMTMP2:%.+]] = sub i32 [[CMNTH]], 1
+ // CHECK: [[MID:%.+]] = and i32 [[CMTMP2]],
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], [[MID]]
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
//
// CHECK: [[MASTER]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: load i16, i16* [[AA_CADDR]],
- // CHECK: br label {{%?}}[[TERM:.+]]
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
//
- // CHECK: [[TERM]]
- // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit()
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK: br label {{%?}}[[EXIT]]
//
@@ -160,19 +175,22 @@ int foo(int n) {
aa += 1;
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l261}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l285}}_worker()
+ // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
+ // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
+ // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
+ // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @llvm.nvvm.barrier0()
- // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
- // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
+ // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
+ // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null
// CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
//
// CHECK: [[SEL_WORKERS]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
- // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
+ // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]],
+ // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0
// CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
//
// CHECK: [[EXEC_PARALLEL]]
@@ -188,7 +206,7 @@ int foo(int n) {
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+foo.+l261]](i[[SZ]]
+ // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l285]](i[[SZ]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]*
@@ -219,26 +237,32 @@ int foo(int n) {
// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
//
- // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
- // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
- // CHECK: [[MID:%.+]] = and i32 [[B]],
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
- // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
- //
- // CHECK: [[CHECK_WORKER]]
- // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
- // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
- // CHECK: {{call|invoke}} void [[T4]]_worker()
- // CHECK: br label {{%?}}[[EXIT]]
+ // CHECK: {{call|invoke}} void [[T3]]_worker()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[CMTMP1:%.+]] = sub i32 [[CMWS]], 1
+ // CHECK: [[CMTMP2:%.+]] = sub i32 [[CMNTH]], 1
+ // CHECK: [[MID:%.+]] = and i32 [[CMTMP2]],
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], [[MID]]
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
//
// CHECK: [[MASTER]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// Use captures.
// CHECK-64-DAG: load i32, i32* [[REF_A]]
@@ -249,10 +273,10 @@ int foo(int n) {
// CHECK-DAG: getelementptr inbounds double, double* [[REF_CN]], i[[SZ]] %{{.+}}
// CHECK-DAG: getelementptr inbounds [[TT]], [[TT]]* [[REF_D]], i32 0, i32 0
//
- // CHECK: br label {{%?}}[[TERM:.+]]
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
//
- // CHECK: [[TERM]]
- // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit()
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK: br label {{%?}}[[EXIT]]
//
@@ -338,19 +362,22 @@ int bar(int n){
return a;
}
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+l298}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+322}}_worker()
+ // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
+ // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
+ // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
+ // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @llvm.nvvm.barrier0()
- // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
- // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
+ // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
+ // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null
// CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
//
// CHECK: [[SEL_WORKERS]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
- // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
+ // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]],
+ // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0
// CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
//
// CHECK: [[EXEC_PARALLEL]]
@@ -366,7 +393,7 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+static.+l298]](i[[SZ]]
+ // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l322]](i[[SZ]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
@@ -382,36 +409,40 @@ int bar(int n){
// CHECK-DAG: [[REF_AAA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
//
- // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
- // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
- // CHECK: [[MID:%.+]] = and i32 [[B]],
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
- // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
- //
- // CHECK: [[CHECK_WORKER]]
- // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
- // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
- // CHECK: {{call|invoke}} void [[T5]]_worker()
- // CHECK: br label {{%?}}[[EXIT]]
+ // CHECK: {{call|invoke}} void [[T4]]_worker()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[CMTMP1:%.+]] = sub i32 [[CMWS]], 1
+ // CHECK: [[CMTMP2:%.+]] = sub i32 [[CMNTH]], 1
+ // CHECK: [[MID:%.+]] = and i32 [[CMTMP2]],
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], [[MID]]
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
//
// CHECK: [[MASTER]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
- //
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK-64-DAG: load i32, i32* [[REF_A]]
// CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
// CHECK-DAG: load i16, i16* [[REF_AA]]
// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
//
- // CHECK: br label {{%?}}[[TERM:.+]]
- //
- // CHECK: [[TERM]]
- // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit()
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK: br label {{%?}}[[EXIT]]
//
@@ -420,19 +451,22 @@ int bar(int n){
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l316}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l340}}_worker()
+ // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
+ // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
+ // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
+ // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @llvm.nvvm.barrier0()
- // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
- // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
+ // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
+ // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null
// CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
//
// CHECK: [[SEL_WORKERS]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
- // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
+ // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]],
+ // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0
// CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
//
// CHECK: [[EXEC_PARALLEL]]
@@ -448,7 +482,7 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+S1.+l316]](
+ // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l340]](
// Create local storage for each capture.
// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1:%struct.*]]*
// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]]
@@ -466,35 +500,42 @@ int bar(int n){
// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
- // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
- // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
- // CHECK: [[MID:%.+]] = and i32 [[B]],
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
- // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
- //
- // CHECK: [[CHECK_WORKER]]
- // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
- // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
+ //
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
- // CHECK: {{call|invoke}} void [[T6]]_worker()
- // CHECK: br label {{%?}}[[EXIT]]
+ // CHECK: {{call|invoke}} void [[T5]]_worker()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[CMTMP1:%.+]] = sub i32 [[CMWS]], 1
+ // CHECK: [[CMTMP2:%.+]] = sub i32 [[CMNTH]], 1
+ // CHECK: [[MID:%.+]] = and i32 [[CMTMP2]],
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], [[MID]]
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
//
// CHECK: [[MASTER]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// Use captures.
// CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0
// CHECK-64-DAG:load i32, i32* [[REF_B]]
// CHECK-32-DAG:load i32, i32* [[LOCAL_B]]
// CHECK-DAG: getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}}
- // CHECK: br label {{%?}}[[TERM:.+]]
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
//
- // CHECK: [[TERM]]
- // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit()
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK: br label {{%?}}[[EXIT]]
//
@@ -503,19 +544,22 @@ int bar(int n){
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l281}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l305}}_worker()
+ // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
+ // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
+ // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
+ // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]],
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @llvm.nvvm.barrier0()
- // CHECK: [[WORK:%.+]] = load i64, i64 addrspace(3)* [[OMP_WID]],
- // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i64 [[WORK]], 0
+ // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
+ // CHECK: [[SHOULD_EXIT:%.+]] = icmp eq i8* [[WORK]], null
// CHECK: br i1 [[SHOULD_EXIT]], label {{%?}}[[EXIT:.+]], label {{%?}}[[SEL_WORKERS:.+]]
//
// CHECK: [[SEL_WORKERS]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[NT:%.+]] = load i32, i32 addrspace(3)* [[OMP_NT]]
- // CHECK: [[IS_ACTIVE:%.+]] = icmp slt i32 [[TID]], [[NT]]
+ // CHECK: [[ST:%.+]] = load i8, i8* [[OMP_EXEC_STATUS]],
+ // CHECK: [[IS_ACTIVE:%.+]] = icmp ne i8 [[ST]], 0
// CHECK: br i1 [[IS_ACTIVE]], label {{%?}}[[EXEC_PARALLEL:.+]], label {{%?}}[[BAR_PARALLEL:.+]]
//
// CHECK: [[EXEC_PARALLEL]]
@@ -531,7 +575,7 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T7:@__omp_offloading_.+template.+l281]](i[[SZ]]
+ // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l305]](i[[SZ]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
@@ -544,36 +588,42 @@ int bar(int n){
// CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
//
- // CHECK: [[NTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[A:%.+]] = sub i32 [[WS]], 1
- // CHECK: [[B:%.+]] = sub i32 [[NTID]], 1
- // CHECK: [[MID:%.+]] = and i32 [[B]],
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: [[EXCESS:%.+]] = icmp ugt i32 [[TID]], [[MID]]
- // CHECK: br i1 [[EXCESS]], label {{%?}}[[EXIT:.+]], label {{%?}}[[CHECK_WORKER:.+]]
- //
- // CHECK: [[CHECK_WORKER]]
- // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[MID]]
- // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[MASTER:.+]]
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[WORKER]]
- // CHECK: {{call|invoke}} void [[T7]]_worker()
- // CHECK: br label {{%?}}[[EXIT]]
+ // CHECK: {{call|invoke}} void [[T6]]_worker()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[CMTMP1:%.+]] = sub i32 [[CMWS]], 1
+ // CHECK: [[CMTMP2:%.+]] = sub i32 [[CMNTH]], 1
+ // CHECK: [[MID:%.+]] = and i32 [[CMTMP2]],
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]], [[MID]]
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
//
// CHECK: [[MASTER]]
- // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- // CHECK: call void @__kmpc_kernel_init(i32 0, i32 [[TID]])
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-64-DAG: load i32, i32* [[REF_A]]
// CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
// CHECK-DAG: load i16, i16* [[REF_AA]]
// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
//
- // CHECK: br label {{%?}}[[TERM:.+]]
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
//
- // CHECK: [[TERM]]
- // CHECK: store i64 0, i64 addrspace(3)* [[OMP_WID]],
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit()
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK: br label {{%?}}[[EXIT]]
//
OpenPOWER on IntegriCloud