diff options
4 files changed, 49 insertions, 26 deletions
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b34bb3388d7..0880b2d7906 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1290,9 +1290,14 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) { } if (IsByRef && Ty.getNonReferenceType()->isScalarType()) { - IsByRef = !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; }, - Level, /*NotLastprivate=*/true); + IsByRef = + !DSAStack->hasExplicitDSA( + D, + [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; }, + Level, /*NotLastprivate=*/true) && + // If the variable is artificial and must be captured by value - try to + // capture by value. + !(isa<OMPCapturedExprDecl>(D) && D->hasAttr<OMPCaptureKindAttr>()); } // When passing data by copy, we need to make sure it fits the uintptr size @@ -2321,10 +2326,11 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, ASTContext &C = S.getASTContext(); Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts(); QualType Ty = Init->getType(); + Attr *OMPCaptureKind = nullptr; if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) { - if (S.getLangOpts().CPlusPlus) + if (S.getLangOpts().CPlusPlus) { Ty = C.getLValueReferenceType(Ty); - else { + } else { Ty = C.getPointerType(Ty); ExprResult Res = S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_AddrOf, Init); @@ -2333,11 +2339,16 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, Init = Res.get(); } WithInit = true; + } else if (AsExpression) { + // This variable must be captured by value. + OMPCaptureKind = OMPCaptureKindAttr::CreateImplicit(C, OMPC_unknown); } auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty, CaptureExpr->getLocStart()); if (!WithInit) CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange())); + if (OMPCaptureKind) + CED->addAttr(OMPCaptureKind); S.CurContext->addHiddenDecl(CED); S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false); return CED; @@ -2346,31 +2357,34 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr, bool WithInit) { OMPCapturedExprDecl *CD; - if (auto *VD = S.IsOpenMPCapturedDecl(D)) + if (auto *VD = S.IsOpenMPCapturedDecl(D)) { CD = cast<OMPCapturedExprDecl>(VD); - else + } else { CD = buildCaptureDecl(S, D->getIdentifier(), CaptureExpr, WithInit, /*AsExpression=*/false); + } return buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(), CaptureExpr->getExprLoc()); } static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) { + CaptureExpr = S.DefaultLvalueConversion(CaptureExpr).get(); if (!Ref) { - auto *CD = - buildCaptureDecl(S, &S.getASTContext().Idents.get(".capture_expr."), - CaptureExpr, /*WithInit=*/true, /*AsExpression=*/true); + OMPCapturedExprDecl *CD = buildCaptureDecl( + S, &S.getASTContext().Idents.get(".capture_expr."), CaptureExpr, + /*WithInit=*/true, /*AsExpression=*/true); Ref = buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(), CaptureExpr->getExprLoc()); } ExprResult Res = Ref; if (!S.getLangOpts().CPlusPlus && CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue() && - Ref->getType()->isPointerType()) + Ref->getType()->isPointerType()) { Res = S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_Deref, Ref); - if (!Res.isUsable()) - return ExprError(); - return CaptureExpr->isGLValue() ? Res : S.DefaultLvalueConversion(Res.get()); + if (!Res.isUsable()) + return ExprError(); + } + return S.DefaultLvalueConversion(Res.get()); } namespace { @@ -8117,12 +8131,13 @@ OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier, if (Val.isInvalid()) return nullptr; - ValExpr = MakeFullExpr(Val.get()).get(); + ValExpr = Val.get(); OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_if, NameModifier); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); @@ -8239,6 +8254,7 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads, OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); @@ -8666,6 +8682,7 @@ OMPClause *Sema::ActOnOpenMPScheduleClause( DSAStack->getCurrentDirective(), OMPC_schedule) != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); @@ -11355,6 +11372,7 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc, OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_device); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); @@ -12378,6 +12396,7 @@ OMPClause *Sema::ActOnOpenMPNumTeamsClause(Expr *NumTeams, OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_num_teams); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); @@ -12404,6 +12423,7 @@ OMPClause *Sema::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit, OpenMPDirectiveKind CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_thread_limit); if (CaptureRegion != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); @@ -12514,6 +12534,7 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause( DSAStack->getCurrentDirective(), OMPC_dist_schedule) != OMPD_unknown && !CurContext->isDependentContext()) { + ValExpr = MakeFullExpr(ValExpr).get(); llvm::MapVector<Expr *, DeclRefExpr *> Captures; ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); HelperValStmt = buildPreInits(Context, Captures); diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 1773619bce2..db3a2de580d 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -15,10 +15,12 @@ void with_var_schedule() { double a = 5; // CHECK: [[CHUNK_SIZE:%.+]] = fptosi double %{{.+}}to i8 // CHECK: store i8 %{{.+}}, i8* [[CHUNK:%.+]], -// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i8* [[CHUNK]]) +// CHECK: [[VAL:%.+]] = load i8, i8* [[CHUNK]], +// CHECK: store i8 [[VAL]], i8* +// CHECK: [[CHUNK:%.+]] = load i64, i64* % +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i64 [[CHUNK]]) -// CHECK: [[CHUNK:%.+]] = load i8*, i8** % -// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]], +// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* % // CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64 // CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]]) // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp index 7c0ac461220..304cc8b3f4a 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -60,8 +60,8 @@ int main() { #pragma omp teams distribute parallel for num_threads(a) for (int i = 0; i < 100; i++) { // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( - // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*, - // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]], + // CHECK-DAG: [[A_ADDR:%.+]] = alloca i64, + // CHECK-DAG: [[A_REF:%.+]] = bitcast i64* [[A_ADDR]] to i8* // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]], // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}} // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]]) @@ -110,9 +110,9 @@ int main() { // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) // CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) -// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*, +// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64, // CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], -// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]], +// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* // CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], // CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp index 93051af6161..2576241725c 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -60,8 +60,8 @@ int main() { #pragma omp teams distribute parallel for simd num_threads(a) for (int i = 0; i < 100; i++) { // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( - // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*, - // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]], + // CHECK-DAG: [[A_ADDR:%.+]] = alloca i64, + // CHECK-DAG: [[A_REF:%.+]] = bitcast i64* [[A_ADDR]] to i8* // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]], // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}} // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]]) @@ -110,9 +110,9 @@ int main() { // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) // CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) -// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*, +// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64, // CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], -// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]], +// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* // CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], // CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) |