summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-11-21 19:41:10 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-11-21 19:41:10 +0000
commit92b33652f6b208cc8e5a1a39472f2549a45de76a (patch)
tree50fe70935073c843df5370cdc109b725f42aeed4
parentda6bc702d07ce9aa7dc43f6a29fd6f23cbfb6857 (diff)
downloadbcm5719-llvm-92b33652f6b208cc8e5a1a39472f2549a45de76a.tar.gz
bcm5719-llvm-92b33652f6b208cc8e5a1a39472f2549a45de76a.zip
[OPENMP]Fix handling of the LCVs in loop-based directives.
Loop-control variables with the default data-sharing attributes should not be captured in the OpenMP region as they are private by default. Also, default attributes should be emitted for such variables in the inner OpenMP regions for the correct data sharing during codegen. llvm-svn: 347409
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp10
-rw-r--r--clang/lib/Sema/TreeTransform.h3
-rw-r--r--clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp90
3 files changed, 101 insertions, 2 deletions
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index a2a935944c1..84c3023081a 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1275,10 +1275,16 @@ bool DSAStackTy::hasExplicitDSA(
return false;
std::advance(StartI, Level);
auto I = StartI->SharingMap.find(D);
- return (I != StartI->SharingMap.end()) &&
+ if ((I != StartI->SharingMap.end()) &&
I->getSecond().RefExpr.getPointer() &&
CPred(I->getSecond().Attributes) &&
- (!NotLastprivate || !I->getSecond().RefExpr.getInt());
+ (!NotLastprivate || !I->getSecond().RefExpr.getInt()))
+ return true;
+ // Check predetermined rules for the loop control variables.
+ auto LI = StartI->LCVMap.find(D);
+ if (LI != StartI->LCVMap.end())
+ return CPred(OMPC_private);
+ return false;
}
bool DSAStackTy::hasExplicitDirective(
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index e99d75cdcf4..4011ec251cf 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -6770,6 +6770,9 @@ TreeTransform<Derived>::TransformDoStmt(DoStmt *S) {
template<typename Derived>
StmtResult
TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
+ if (getSema().getLangOpts().OpenMP)
+ getSema().startOpenMPLoop();
+
// Transform the initialization statement
StmtResult Init = getDerived().TransformStmt(S->getInit());
if (Init.isInvalid())
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
new file mode 100644
index 00000000000..ba99d17bdc2
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
@@ -0,0 +1,90 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: [[MEM_TY:%.+]] = type { [4 x i8] }
+// CHECK-DAG: {{@__omp_offloading_.+}}_l19_exec_mode = weak constant i8 1
+// CHECK-DAG: internal unnamed_addr constant i{{64|32}} 4
+
+template<typename tx>
+tx ftemplate(int n) {
+ int i;
+
+ #pragma omp target teams distribute
+ for (i = 0; i < 10; ++i)
+ {
+#pragma omp parallel
+ ++i;
+ }
+
+ return i;
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<char>(n);
+
+ return a;
+}
+
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l19}}_worker()
+ // CHECK: ret void
+
+ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l19}}()
+
+ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
+ // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
+ // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
+ //
+ // CHECK: [[WORKER]]
+ // CHECK: {{call|invoke}} void {{@__omp_offloading_.+template.+l19}}_worker()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[CHECK_MASTER]]
+ // CHECK-DAG: [[CMTID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK-DAG: [[CMNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[CMWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[IS_MASTER:%.+]] = icmp eq i32 [[CMTID]],
+ // CHECK: br i1 [[IS_MASTER]], label {{%?}}[[MASTER:.+]], label {{%?}}[[EXIT]]
+ //
+ // CHECK: [[MASTER]]
+ // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
+ // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
+ // CHECK: call void @__kmpc_get_team_static_memory(i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* @{{.+}}, i32 0, i32 0, i32 0) to i8*), i{{64|32}} 4, i16 1, i8** addrspacecast (i8* addrspace(3)* [[BUF:@.+]] to i8**))
+ // CHECK: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[BUF]],
+ // CHECK: [[RD:%.+]] = bitcast i8* [[PTR]] to [[GLOB_TY:%.+]]*
+ // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0
+ //
+ // CHECK: call void @__kmpc_for_static_init_4(
+ // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
+ // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1)
+ // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]],
+ // CHECK: [[I_ADDR_BC:%.+]] = bitcast i32* [[I_ADDR]] to i8*
+ // CHECK: store i8* [[I_ADDR_BC]], i8** [[SHARED_VARS_BUF]],
+ // CHECK: call void @llvm.nvvm.barrier0()
+ // CHECK: call void @llvm.nvvm.barrier0()
+ // CHECK: call void @__kmpc_end_sharing_variables()
+ // CHECK: call void @__kmpc_for_static_fini(
+ // CHECK: br label {{%?}}[[TERMINATE:.+]]
+ //
+ // CHECK: [[TERMINATE]]
+ // CHECK: call void @__kmpc_kernel_deinit(
+ // CHECK: call void @llvm.nvvm.barrier0()
+ // CHECK: br label {{%?}}[[EXIT]]
+ //
+ // CHECK: [[EXIT]]
+ // CHECK: ret void
+
+#endif
OpenPOWER on IntegriCloud