diff options
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 84 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 6 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 53 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 5 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 14 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaOpenMP.cpp | 58 | ||||
| -rw-r--r-- | clang/test/OpenMP/nvptx_lambda_capturing.cpp | 132 |
7 files changed, 348 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 47828d189c8..a15918d4fbb 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -7532,6 +7532,76 @@ public: } } + /// Emit capture info for lambdas for variables captured by reference. + void generateInfoForLambdaCaptures(const ValueDecl *VD, llvm::Value *Arg, + MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + return; + Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); + LValue VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + BasePointers.push_back(VDLVal.getPointer()); + Pointers.push_back(ThisLVal.getPointer()); + Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + BasePointers.push_back(VDLVal.getPointer()); + Pointers.push_back(VarLVal.getPointer()); + Sizes.push_back(CGF.getTypeSize( + VD->getType().getCanonicalType().getNonReferenceType())); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + } + + /// Set correct indices for lambdas captures. + void adjustMemberOfForLambdaCaptures(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapFlagsArrayTy &Types) const { + for (unsigned I = 0, E = Types.size(); I < E; ++I) { + // Set correct member_of idx for all implicit lambda captures. + if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) + continue; + llvm::Value *BasePtr = *BasePointers[I]; + int TgtIdx = -1; + for (unsigned J = I; J > 0; --J) { + unsigned Idx = J - 1; + if (Pointers[Idx] != BasePtr) + continue; + TgtIdx = Idx; + break; + } + assert(TgtIdx != -1 && "Unable to find parent lambda."); + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); + setCorrectMemberOfFlag(Types[I], MemberOfFlag); + } + } + /// Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, @@ -8133,6 +8203,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, + CurBasePointers, CurPointers, + CurSizes, CurMapTypes); } // We expect to have at least an element of information for this capture. assert(!CurBasePointers.empty() && @@ -8154,6 +8230,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Sizes.append(CurSizes.begin(), CurSizes.end()); MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures(BasePointers, Pointers, MapTypes); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, @@ -8465,6 +8543,12 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const { } } +void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 0cc1056ffab..7236b9e2519 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1543,6 +1543,12 @@ public: /// Emit deferred declare target variables marked for deferred emission. void emitDeferredTargetDecls() const; + + /// Adjust some parameters for the target-based directives, like addresses of + /// the variables captured by reference in lambdas. + virtual void + adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, + const OMPExecutableDirective &D) const; }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 101ab54d585..e93b73bbccd 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4255,3 +4255,56 @@ void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), SourceLocation()); } + +void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); + const CapturedStmt *CS = D.getCapturedStmt(OMPD_target); + for (const CapturedStmt::Capture &C : CS->captures()) { + // Capture variables captured by reference in lambdas for target-based + // directives. + if (!C.capturesVariable()) + continue; + const VarDecl *VD = C.getCapturedVar(); + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + continue; + Address VDAddr = CGF.GetAddrOfLocalVar(VD); + LValue VDLVal; + if (VD->getType().getCanonicalType()->isReferenceType()) + VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType()); + else + VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + llvm::Value *CXXThis = CGF.LoadCXXThis(); + CGF.EmitStoreOfScalar(CXXThis, ThisLVal); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + if (!CS->capturesVariable(VD)) + continue; + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + Address VDAddr = CGF.GetAddrOfLocalVar(VD); + if (VD->getType().getCanonicalType()->isReferenceType()) + VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, + VD->getType().getCanonicalType()) + .getAddress(); + CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal); + } + } +} + diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 1ce8a175dae..b0b66d86320 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -350,6 +350,11 @@ public: const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override; + /// Adjust some parameters for the target-based directives, like addresses of + /// the variables captured by reference in lambdas. + void adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const override; + private: /// Track the execution mode when codegening directives within a target /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c0e56647770..7b6d8356232 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1738,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPReductionClauseInit(S, LoopScope); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { @@ -2296,6 +2298,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( EmitOMPPrivateLoopCounters(S, LoopScope); EmitOMPLinearClause(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); // Detect the loop schedule kind and chunk. const Expr *ChunkExpr = nullptr; @@ -2589,6 +2593,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); // Emit static non-chunked loop. OpenMPScheduleTy ScheduleKind; @@ -3342,6 +3348,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); // Detect the distribute schedule kind and chunk. llvm::Value *Chunk = nullptr; @@ -4071,6 +4079,8 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } @@ -4151,6 +4161,8 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; @@ -4709,6 +4721,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); // TODO: Add support for clauses. CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index cfb490fb2f4..ac8f740d0de 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -164,6 +164,9 @@ private: OpenMPClauseKind ClauseKindMode = OMPC_unknown; Sema &SemaRef; bool ForceCapturing = false; + /// true if all the vaiables in the target executable directives must be + /// captured by reference. + bool ForceCaptureByReferenceInTargetExecutable = false; CriticalsWithHintsTy Criticals; using iterator = StackTy::const_reverse_iterator; @@ -195,6 +198,13 @@ public: bool isForceVarCapturing() const { return ForceCapturing; } void setForceVarCapturing(bool V) { ForceCapturing = V; } + void setForceCaptureByReferenceInTargetExecutable(bool V) { + ForceCaptureByReferenceInTargetExecutable = V; + } + bool isForceCaptureByReferenceInTargetExecutable() const { + return ForceCaptureByReferenceInTargetExecutable; + } + void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName, Scope *CurScope, SourceLocation Loc) { if (Stack.empty() || @@ -1435,6 +1445,8 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level) const { // By default, all the data that has a scalar type is mapped by copy // (except for reduction variables). IsByRef = + (DSAStack->isForceCaptureByReferenceInTargetExecutable() && + !Ty->isAnyPointerType()) || !Ty->isScalarType() || DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar || DSAStack->hasExplicitDSA( @@ -1444,10 +1456,12 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level) const { if (IsByRef && Ty.getNonReferenceType()->isScalarType()) { IsByRef = - !DSAStack->hasExplicitDSA( - D, - [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; }, - Level, /*NotLastprivate=*/true) && + ((DSAStack->isForceCaptureByReferenceInTargetExecutable() && + !Ty->isAnyPointerType()) || + !DSAStack->hasExplicitDSA( + D, + [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; }, + Level, /*NotLastprivate=*/true)) && // If the variable is artificial and must be captured by value - try to // capture by value. !(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() && @@ -1509,6 +1523,42 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D) { return VD; } } + // Capture variables captured by reference in lambdas for target-based + // directives. + if (VD && !DSAStack->isClauseParsingMode()) { + if (const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl()) { + bool SavedForceCaptureByReferenceInTargetExecutable = + DSAStack->isForceCaptureByReferenceInTargetExecutable(); + DSAStack->setForceCaptureByReferenceInTargetExecutable(/*V=*/true); + if (RD->isLambda()) + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() == LCK_ByRef) { + VarDecl *VD = LC.getCapturedVar(); + DeclContext *VDC = VD->getDeclContext(); + if (!VDC->Encloses(CurContext)) + continue; + DSAStackTy::DSAVarData DVarPrivate = + DSAStack->getTopDSA(VD, /*FromParent=*/false); + // Do not capture already captured variables. + if (!OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && + DVarPrivate.CKind == OMPC_unknown && + !DSAStack->checkMappableExprComponentListsForDecl( + D, /*CurrentRegionOnly=*/true, + [](OMPClauseMappableExprCommon:: + MappableExprComponentListRef, + OpenMPClauseKind) { return true; })) + MarkVariableReferenced(LC.getLocation(), LC.getCapturedVar()); + } else if (LC.getCaptureKind() == LCK_This) { + CheckCXXThisCapture(LC.getLocation()); + } + } + DSAStack->setForceCaptureByReferenceInTargetExecutable( + SavedForceCaptureByReferenceInTargetExecutable); + } + } if (DSAStack->getCurrentDirective() != OMPD_unknown && (!DSAStack->isClauseParsingMode() || diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp new file mode 100644 index 00000000000..cf40c8b566c --- /dev/null +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -0,0 +1,132 @@ +// REQUIRES: powerpc-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4, i64 4] +// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 544, i64 33, i64 673, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 800] +// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4] +// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 547, i64 544, i64 547, i64 673, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592] +// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] +// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953421968] +// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8] +// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953421968] +// CHECK-DAG: [[S:%.+]] = type { i32 } +// CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* } +// CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* } + +// CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63_worker() +// CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}}) +// CLASS-NOT: getelementptr +// CLASS: br i1 % +// CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63_worker() +// CLASS: br label % +// CLASS: br i1 % +// CLASS: call void @__kmpc_kernel_init( +// CLASS: call void @__kmpc_data_sharing_init_stack() +// CLASS: call void @llvm.memcpy. +// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], +// CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 +// CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]], +// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], +// CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]]) +// CLASS: ret void + +// CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l65([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}}) +// CLASS-NOT: getelementptr +// CLASS: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* %{{.+}}) +// CLASS: ret void + +// CLASS: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}}) +// CLASS-NOT: getelementptr +// CLASS: call void @llvm.memcpy. +// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]], +// CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0 +// CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]], +// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]], +// CLASS: call i32 [[LAMBDA1]]([[CAP1]]* [[L]]) +// CLASS: ret void + +struct S { + int a = 15; + int foo() { + auto &&L = [&]() { return a; }; +#pragma omp target + L(); +#pragma omp target parallel + L(); + return a; + } +} s; + +// FUN: define internal void @__omp_offloading_{{.+}}_main_l125_worker() +// FUN: define weak void @__omp_offloading_{{.+}}_main_l125(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}}) +// FUN-NOT: getelementptr +// FUN: br i1 % +// FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l125_worker() +// FUN: br label % +// FUN: br i1 % +// FUN: call void @__kmpc_kernel_init( +// FUN: call void @__kmpc_data_sharing_init_stack() +// FUN: call void @llvm.memcpy. +// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], +// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 +// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], +// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 +// FUN: store i32* %{{.+}}, i32** [[B_CAP]], +// FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 +// FUN: store i32* %{{.+}}, i32** [[C_CAP]], +// FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 +// FUN: store i32** %{{.+}}, i32*** [[D_CAP]], +// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 +// FUN: store i32* %{{.+}}, i32** [[A_CAP]], +// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]], +// FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]]) +// FUN: ret void + +// FUN: define weak void @__omp_offloading_{{.+}}_main_l127(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}) +// FUN-NOT: getelementptr +// FUN: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, [[CAP2]]* %{{.+}}) +// FUN: ret void + +// FUN: define internal void [[PARALLEL:@.+]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}) +// FUN-NOT: getelementptr +// FUN: call void @llvm.memcpy. +// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], +// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0 +// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]], +// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1 +// FUN: store i32* %{{.+}}, i32** [[B_CAP]], +// FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2 +// FUN: store i32* %{{.+}}, i32** [[C_CAP]], +// FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3 +// FUN: store i32** %{{.+}}, i32*** [[D_CAP]], +// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4 +// FUN: store i32* %{{.+}}, i32** [[A_CAP]], +// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]], +// FUN: call i64 [[LAMBDA2]]([[CAP2]]* [[L]]) +// FUN: ret void + +int main(int argc, char **argv) { + int &b = argc; + int &&c = 1; + int *d = &argc; + int a; + auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; }; +#pragma omp target firstprivate(argc) map(to : a) + L(); +#pragma omp target parallel + L(); + return argc + s.foo(); +} + +#endif // HEADER |

