summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp84
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h6
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp53
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h5
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp14
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp58
-rw-r--r--clang/test/OpenMP/nvptx_lambda_capturing.cpp132
7 files changed, 348 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 47828d189c8..a15918d4fbb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7532,6 +7532,76 @@ public:
}
}
+ /// Emit capture info for lambdas for variables captured by reference.
+ void generateInfoForLambdaCaptures(const ValueDecl *VD, llvm::Value *Arg,
+ MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers,
+ MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types) const {
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ return;
+ Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
+ LValue VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ BasePointers.push_back(VDLVal.getPointer());
+ Pointers.push_back(ThisLVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ BasePointers.push_back(VDLVal.getPointer());
+ Pointers.push_back(VarLVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(
+ VD->getType().getCanonicalType().getNonReferenceType()));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ }
+
+ /// Set correct indices for lambdas captures.
+ void adjustMemberOfForLambdaCaptures(MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers,
+ MapFlagsArrayTy &Types) const {
+ for (unsigned I = 0, E = Types.size(); I < E; ++I) {
+ // Set correct member_of idx for all implicit lambda captures.
+ if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_PRIVATE |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
+ continue;
+ llvm::Value *BasePtr = *BasePointers[I];
+ int TgtIdx = -1;
+ for (unsigned J = I; J > 0; --J) {
+ unsigned Idx = J - 1;
+ if (Pointers[Idx] != BasePtr)
+ continue;
+ TgtIdx = Idx;
+ break;
+ }
+ assert(TgtIdx != -1 && "Unable to find parent lambda.");
+ // All other current entries will be MEMBER_OF the combined entry
+ // (except for PTR_AND_OBJ entries which do not have a placeholder value
+ // 0xFFFF in the MEMBER_OF field).
+ OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
+ setCorrectMemberOfFlag(Types[I], MemberOfFlag);
+ }
+ }
+
/// Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
@@ -8133,6 +8203,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
if (CurBasePointers.empty())
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
CurPointers, CurSizes, CurMapTypes);
+ // Generate correct mapping for variables captured by reference in
+ // lambdas.
+ if (CI->capturesVariable())
+ MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+ CurBasePointers, CurPointers,
+ CurSizes, CurMapTypes);
}
// We expect to have at least an element of information for this capture.
assert(!CurBasePointers.empty() &&
@@ -8154,6 +8230,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
Sizes.append(CurSizes.begin(), CurSizes.end());
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
}
+ // Adjust MEMBER_OF flags for the lambdas captures.
+ MEHandler.adjustMemberOfForLambdaCaptures(BasePointers, Pointers, MapTypes);
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
@@ -8465,6 +8543,12 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const {
}
}
+void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+}
+
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
CodeGenModule &CGM)
: CGM(CGM) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 0cc1056ffab..7236b9e2519 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -1543,6 +1543,12 @@ public:
/// Emit deferred declare target variables marked for deferred emission.
void emitDeferredTargetDecls() const;
+
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ virtual void
+ adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) const;
};
/// Class supports emissionof SIMD-only code.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 101ab54d585..e93b73bbccd 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4255,3 +4255,56 @@ void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
SourceLocation());
}
+
+void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
+ for (const CapturedStmt::Capture &C : CS->captures()) {
+ // Capture variables captured by reference in lambdas for target-based
+ // directives.
+ if (!C.capturesVariable())
+ continue;
+ const VarDecl *VD = C.getCapturedVar();
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ continue;
+ Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ LValue VDLVal;
+ if (VD->getType().getCanonicalType()->isReferenceType())
+ VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
+ else
+ VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ llvm::Value *CXXThis = CGF.LoadCXXThis();
+ CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ if (!CS->capturesVariable(VD))
+ continue;
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ if (VD->getType().getCanonicalType()->isReferenceType())
+ VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
+ VD->getType().getCanonicalType())
+ .getAddress();
+ CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
+ }
+ }
+}
+
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 1ce8a175dae..b0b66d86320 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -350,6 +350,11 @@ public:
const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
const Expr *&ChunkExpr) const override;
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ void adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+
private:
/// Track the execution mode when codegening directives within a target
/// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c0e56647770..7b6d8356232 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1738,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPReductionClauseInit(S, LoopScope);
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
S.getInc(),
[&S](CodeGenFunction &CGF) {
@@ -2296,6 +2298,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
EmitOMPPrivateLoopCounters(S, LoopScope);
EmitOMPLinearClause(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
// Detect the loop schedule kind and chunk.
const Expr *ChunkExpr = nullptr;
@@ -2589,6 +2593,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
// Emit static non-chunked loop.
OpenMPScheduleTy ScheduleKind;
@@ -3342,6 +3348,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
// Detect the distribute schedule kind and chunk.
llvm::Value *Chunk = nullptr;
@@ -4071,6 +4079,8 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
}
@@ -4151,6 +4161,8 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
@@ -4709,6 +4721,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
// TODO: Add support for clauses.
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index cfb490fb2f4..ac8f740d0de 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -164,6 +164,9 @@ private:
OpenMPClauseKind ClauseKindMode = OMPC_unknown;
Sema &SemaRef;
bool ForceCapturing = false;
+ /// true if all the vaiables in the target executable directives must be
+ /// captured by reference.
+ bool ForceCaptureByReferenceInTargetExecutable = false;
CriticalsWithHintsTy Criticals;
using iterator = StackTy::const_reverse_iterator;
@@ -195,6 +198,13 @@ public:
bool isForceVarCapturing() const { return ForceCapturing; }
void setForceVarCapturing(bool V) { ForceCapturing = V; }
+ void setForceCaptureByReferenceInTargetExecutable(bool V) {
+ ForceCaptureByReferenceInTargetExecutable = V;
+ }
+ bool isForceCaptureByReferenceInTargetExecutable() const {
+ return ForceCaptureByReferenceInTargetExecutable;
+ }
+
void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName,
Scope *CurScope, SourceLocation Loc) {
if (Stack.empty() ||
@@ -1435,6 +1445,8 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level) const {
// By default, all the data that has a scalar type is mapped by copy
// (except for reduction variables).
IsByRef =
+ (DSAStack->isForceCaptureByReferenceInTargetExecutable() &&
+ !Ty->isAnyPointerType()) ||
!Ty->isScalarType() ||
DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar ||
DSAStack->hasExplicitDSA(
@@ -1444,10 +1456,12 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level) const {
if (IsByRef && Ty.getNonReferenceType()->isScalarType()) {
IsByRef =
- !DSAStack->hasExplicitDSA(
- D,
- [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
- Level, /*NotLastprivate=*/true) &&
+ ((DSAStack->isForceCaptureByReferenceInTargetExecutable() &&
+ !Ty->isAnyPointerType()) ||
+ !DSAStack->hasExplicitDSA(
+ D,
+ [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
+ Level, /*NotLastprivate=*/true)) &&
// If the variable is artificial and must be captured by value - try to
// capture by value.
!(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() &&
@@ -1509,6 +1523,42 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D) {
return VD;
}
}
+ // Capture variables captured by reference in lambdas for target-based
+ // directives.
+ if (VD && !DSAStack->isClauseParsingMode()) {
+ if (const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl()) {
+ bool SavedForceCaptureByReferenceInTargetExecutable =
+ DSAStack->isForceCaptureByReferenceInTargetExecutable();
+ DSAStack->setForceCaptureByReferenceInTargetExecutable(/*V=*/true);
+ if (RD->isLambda())
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() == LCK_ByRef) {
+ VarDecl *VD = LC.getCapturedVar();
+ DeclContext *VDC = VD->getDeclContext();
+ if (!VDC->Encloses(CurContext))
+ continue;
+ DSAStackTy::DSAVarData DVarPrivate =
+ DSAStack->getTopDSA(VD, /*FromParent=*/false);
+ // Do not capture already captured variables.
+ if (!OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
+ DVarPrivate.CKind == OMPC_unknown &&
+ !DSAStack->checkMappableExprComponentListsForDecl(
+ D, /*CurrentRegionOnly=*/true,
+ [](OMPClauseMappableExprCommon::
+ MappableExprComponentListRef,
+ OpenMPClauseKind) { return true; }))
+ MarkVariableReferenced(LC.getLocation(), LC.getCapturedVar());
+ } else if (LC.getCaptureKind() == LCK_This) {
+ CheckCXXThisCapture(LC.getLocation());
+ }
+ }
+ DSAStack->setForceCaptureByReferenceInTargetExecutable(
+ SavedForceCaptureByReferenceInTargetExecutable);
+ }
+ }
if (DSAStack->getCurrentDirective() != OMPD_unknown &&
(!DSAStack->isClauseParsingMode() ||
diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
new file mode 100644
index 00000000000..cf40c8b566c
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
@@ -0,0 +1,132 @@
+// REQUIRES: powerpc-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4, i64 4]
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 544, i64 33, i64 673, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 1407374883553936, i64 800]
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4]
+// HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 547, i64 547, i64 547, i64 544, i64 547, i64 673, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592, i64 1688849860264592]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953421968]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
+// HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953421968]
+// CHECK-DAG: [[S:%.+]] = type { i32 }
+// CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* }
+// CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* }
+
+// CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63_worker()
+// CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63([[S]]* {{%.+}}, [[CAP1]]* dereferenceable(8) {{%.+}})
+// CLASS-NOT: getelementptr
+// CLASS: br i1 %
+// CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l63_worker()
+// CLASS: br label %
+// CLASS: br i1 %
+// CLASS: call void @__kmpc_kernel_init(
+// CLASS: call void @__kmpc_data_sharing_init_stack()
+// CLASS: call void @llvm.memcpy.
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
+// CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0
+// CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]],
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]],
+// CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* [[L]])
+// CLASS: ret void
+
+// CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l65([[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
+// CLASS-NOT: getelementptr
+// CLASS: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* %{{.+}})
+// CLASS: ret void
+
+// CLASS: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[S]]* %{{.+}}, [[CAP1]]* dereferenceable(8) %{{.+}})
+// CLASS-NOT: getelementptr
+// CLASS: call void @llvm.memcpy.
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
+// CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0
+// CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]],
+// CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]],
+// CLASS: call i32 [[LAMBDA1]]([[CAP1]]* [[L]])
+// CLASS: ret void
+
+struct S {
+ int a = 15;
+ int foo() {
+ auto &&L = [&]() { return a; };
+#pragma omp target
+ L();
+#pragma omp target parallel
+ L();
+ return a;
+ }
+} s;
+
+// FUN: define internal void @__omp_offloading_{{.+}}_main_l125_worker()
+// FUN: define weak void @__omp_offloading_{{.+}}_main_l125(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}}, i64 %{{.+}})
+// FUN-NOT: getelementptr
+// FUN: br i1 %
+// FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l125_worker()
+// FUN: br label %
+// FUN: br i1 %
+// FUN: call void @__kmpc_kernel_init(
+// FUN: call void @__kmpc_data_sharing_init_stack()
+// FUN: call void @llvm.memcpy.
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
+// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
+// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
+// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
+// FUN: store i32* %{{.+}}, i32** [[B_CAP]],
+// FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2
+// FUN: store i32* %{{.+}}, i32** [[C_CAP]],
+// FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3
+// FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
+// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
+// FUN: store i32* %{{.+}}, i32** [[A_CAP]],
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
+// FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* [[L]])
+// FUN: ret void
+
+// FUN: define weak void @__omp_offloading_{{.+}}_main_l127(i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}} i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
+// FUN-NOT: getelementptr
+// FUN: call void [[PARALLEL:@.+]](i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, [[CAP2]]* %{{.+}})
+// FUN: ret void
+
+// FUN: define internal void [[PARALLEL:@.+]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[CAP2]]* dereferenceable(40) %{{.+}})
+// FUN-NOT: getelementptr
+// FUN: call void @llvm.memcpy.
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
+// FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
+// FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
+// FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
+// FUN: store i32* %{{.+}}, i32** [[B_CAP]],
+// FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2
+// FUN: store i32* %{{.+}}, i32** [[C_CAP]],
+// FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3
+// FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
+// FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
+// FUN: store i32* %{{.+}}, i32** [[A_CAP]],
+// FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
+// FUN: call i64 [[LAMBDA2]]([[CAP2]]* [[L]])
+// FUN: ret void
+
+int main(int argc, char **argv) {
+ int &b = argc;
+ int &&c = 1;
+ int *d = &argc;
+ int a;
+ auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; };
+#pragma omp target firstprivate(argc) map(to : a)
+ L();
+#pragma omp target parallel
+ L();
+ return argc + s.foo();
+}
+
+#endif // HEADER
OpenPOWER on IntegriCloud