summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2017-07-17 13:30:36 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2017-07-17 13:30:36 +0000
commitbe5a8b42cd3172bf3fb5e6c1ed1f307f9465f926 (patch)
tree1396ac12c250a9d0ac7144a7efdf74729960a919 /clang/lib/CodeGen/CGOpenMPRuntime.cpp
parentdd2c29ef8374b358d4f1ff55904da9aed941cbd7 (diff)
downloadbcm5719-llvm-be5a8b42cd3172bf3fb5e6c1ed1f307f9465f926.tar.gz
bcm5719-llvm-be5a8b42cd3172bf3fb5e6c1ed1f307f9465f926.zip
[OPENMP] Codegen for reduction clauses in 'taskloop' directives.
Adds codegen for taskloop-based directives. llvm-svn: 308174
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp452
1 files changed, 439 insertions, 13 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 76f44670612..d488bd4b30b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -643,6 +643,12 @@ enum OpenMPRTLFunction {
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_wait,
+ // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ OMPRTL__kmpc_task_reduction_init,
+ // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ OMPRTL__kmpc_task_reduction_get_th_data,
//
// Offloading related calls
@@ -766,8 +772,8 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
/// \param SrcAddr Address of the original array.
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
QualType Type, const Expr *Init,
+ const OMPDeclareReductionDecl *DRD,
Address SrcAddr = Address::invalid()) {
- auto *DRD = getReductionInit(Init);
// Perform element-by-element initialization.
QualType ElementTy;
@@ -869,19 +875,17 @@ LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
return LValue();
}
-void ReductionCodeGen::emitAggregateInitialization(CodeGenFunction &CGF,
- unsigned N,
- Address PrivateAddr,
- LValue SharedLVal) {
+void ReductionCodeGen::emitAggregateInitialization(
+ CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+ const OMPDeclareReductionDecl *DRD) {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
- SharedLVal.getAddress());
+ DRD, SharedLVal.getAddress());
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
@@ -890,6 +894,7 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
ClausesData.reserve(Shareds.size());
SharedAddresses.reserve(Shareds.size());
Sizes.reserve(Shareds.size());
+ BaseDecls.reserve(Shareds.size());
auto IPriv = Privates.begin();
auto IRed = ReductionOps.begin();
for (const auto *Ref : Shareds) {
@@ -912,20 +917,30 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
- Sizes.emplace_back(nullptr);
+ Sizes.emplace_back(
+ CGF.getTypeSize(
+ SharedAddresses[N].first.getType().getNonReferenceType()),
+ nullptr);
return;
}
llvm::Value *Size;
+ llvm::Value *SizeInChars;
+ llvm::Type *ElemType =
+ cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
+ ->getElementType();
+ auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
SharedAddresses[N].first.getPointer());
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
+ SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
- Size = CGF.getTypeSize(
+ SizeInChars = CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType());
+ Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
- Sizes.emplace_back(Size);
+ Sizes.emplace_back(SizeInChars, Size);
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
@@ -941,7 +956,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
- assert(!Size && !Sizes[N] &&
+ assert(!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified redution "
"items.");
return;
@@ -971,7 +986,7 @@ void ReductionCodeGen::emitInitialization(
SharedType, SharedAddresses[N].first.getBaseInfo());
if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
CGF.getContext().getAsArrayType(PrivateVD->getType())) {
- emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal);
+ emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
PrivateAddr, SharedLVal.getAddress(),
@@ -1091,6 +1106,11 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
return PrivateAddr;
}
+bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
+ auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ return DRD && DRD->getInitializer();
+}
+
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
@@ -1948,6 +1968,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
break;
}
+ case OMPRTL__kmpc_task_reduction_init: {
+ // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
+ break;
+ }
+ case OMPRTL__kmpc_task_reduction_get_th_data: {
+ // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -2298,6 +2338,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
+Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+ QualType VarType,
+ StringRef Name) {
+ llvm::Twine VarName(Name, ".artificial.");
+ llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
+ llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, SourceLocation()),
+ getThreadID(CGF, SourceLocation()),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
+ CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
+ /*IsSigned=*/false),
+ getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
+ return Address(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+ VarLVType->getPointerTo(/*AddrSpace=*/0)),
+ CGM.getPointerAlign());
+}
+
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
@@ -3093,6 +3154,8 @@ enum KmpTaskTFields {
KmpTaskTStride,
/// (Taskloops only) Is last iteration flag.
KmpTaskTLastIter,
+ /// (Taskloops only) Reduction data.
+ KmpTaskTReductions,
};
} // anonymous namespace
@@ -3644,6 +3707,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
// kmp_uint64 ub;
// kmp_int64 st;
// kmp_int32 liter;
+ // void * reductions;
// };
auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
UD->startDefinition();
@@ -3667,6 +3731,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
addFieldToRecordDecl(C, RD, KmpInt64Ty);
addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ addFieldToRecordDecl(C, RD, C.VoidPtrTy);
}
RD->completeDefinition();
return RD;
@@ -3697,7 +3762,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
/// For taskloops:
/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
-/// tt->shareds);
+/// tt->reductions, tt->shareds);
/// return 0;
/// }
/// \endcode
@@ -3783,10 +3848,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+ auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
+ auto RLVal = CGF.EmitLValueForField(Base, *RFI);
+ auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
CallArgs.push_back(LBParam);
CallArgs.push_back(UBParam);
CallArgs.push_back(StParam);
CallArgs.push_back(LIParam);
+ CallArgs.push_back(RParam);
}
CallArgs.push_back(SharedsParam);
@@ -4549,6 +4618,16 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
+ // Store reductions address.
+ LValue RedLVal = CGF.EmitLValueForField(
+ Result.TDBase,
+ *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
+ if (Data.Reductions)
+ CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
+ else {
+ CGF.EmitNullInitialization(RedLVal.getAddress(),
+ CGF.getContext().VoidPtrTy);
+ }
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
UpLoc,
@@ -5074,6 +5153,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
}
+/// Generates unique name for artificial threadprivate variables.
+/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
+static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
+ unsigned N) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
+ return Out.str();
+}
+
+/// Emits reduction initializer function:
+/// \code
+/// void @.red_init(void* %arg) {
+/// %0 = bitcast void* %arg to <type>*
+/// store <type> <init>, <type>* %0
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) {
+ auto &C = CGM.getContext();
+ FunctionArgList Args;
+ ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&Param);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_init.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ Address PrivateAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&Param),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ LValue SharedLVal;
+ // If initializer uses initializer from declare reduction construct, emit a
+ // pointer to the address of the original reduction item (reuired by reduction
+ // initializer)
+ if (RCG.usesReductionInitializer(N)) {
+ Address SharedAddr =
+ CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().VoidPtrTy,
+ generateUniqueName("reduction", Loc, N));
+ SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
+ } else {
+ SharedLVal = CGF.MakeNaturalAlignAddrLValue(
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ CGM.getContext().VoidPtrTy);
+ }
+ // Emit the initializer:
+ // %0 = bitcast void* %arg to <type>*
+ // store <type> <init>, <type>* %0
+ RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
+ [](CodeGenFunction &) { return false; });
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emits reduction combiner function:
+/// \code
+/// void @.red_comb(void* %arg0, void* %arg1) {
+/// %lhs = bitcast void* %arg0 to <type>*
+/// %rhs = bitcast void* %arg1 to <type>*
+/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
+/// store <type> %2, <type>* %lhs
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N,
+ const Expr *ReductionOp,
+ const Expr *LHS, const Expr *RHS,
+ const Expr *PrivateRef) {
+ auto &C = CGM.getContext();
+ auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
+ auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
+ FunctionArgList Args;
+ ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&ParamInOut);
+ Args.emplace_back(&ParamIn);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_comb.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ // Remap lhs and rhs variables to the addresses of the function arguments.
+ // %lhs = bitcast void* %arg0 to <type>*
+ // %rhs = bitcast void* %arg1 to <type>*
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
+ // Pull out the pointer to the variable.
+ Address PtrAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&ParamInOut),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ return CGF.Builder.CreateElementBitCast(
+ PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
+ });
+ PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
+ // Pull out the pointer to the variable.
+ Address PtrAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&ParamIn),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ return CGF.Builder.CreateElementBitCast(
+ PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
+ });
+ PrivateScope.Privatize();
+ // Emit the combiner body:
+ // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
+ // store <type> %2, <type>* %lhs
+ CGM.getOpenMPRuntime().emitSingleReductionCombiner(
+ CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
+ cast<DeclRefExpr>(RHS));
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emits reduction finalizer function:
+/// \code
+/// void @.red_fini(void* %arg) {
+/// %0 = bitcast void* %arg to <type>*
+/// <destroy>(<type>* %0)
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) {
+ if (!RCG.needCleanups(N))
+ return nullptr;
+ auto &C = CGM.getContext();
+ FunctionArgList Args;
+ ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&Param);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_fini.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ Address PrivateAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&Param),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ // Emit the finalizer body:
+ // <destroy>(<type>* %0)
+ RCG.emitCleanups(CGF, N, PrivateAddr);
+ CGF.FinishFunction();
+ return Fn;
+}
+
+llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
+ if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
+ return nullptr;
+
+ // Build typedef struct:
+ // kmp_task_red_input {
+ // void *reduce_shar; // shared reduction item
+ // size_t reduce_size; // size of data item
+ // void *reduce_init; // data initialization routine
+ // void *reduce_fini; // data finalization routine
+ // void *reduce_comb; // data combiner routine
+ // kmp_task_red_flags_t flags; // flags for additional info from compiler
+ // } kmp_task_red_input_t;
+ ASTContext &C = CGM.getContext();
+ auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+ RD->startDefinition();
+ const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
+ const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *FlagsFD = addFieldToRecordDecl(
+ C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
+ RD->completeDefinition();
+ QualType RDType = C.getRecordType(RD);
+ unsigned Size = Data.ReductionVars.size();
+ llvm::APInt ArraySize(/*numBits=*/64, Size);
+ QualType ArrayRDType = C.getConstantArrayType(
+ RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ // kmp_task_red_input_t .rd_input.[Size];
+ Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
+ Data.ReductionOps);
+ for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
+ // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
+ llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
+ llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
+ llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
+ TaskRedInput.getPointer(), Idxs,
+ /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
+ ".rd_input.gep.");
+ LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
+ // ElemLVal.reduce_shar = &Shareds[Cnt];
+ LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
+ RCG.emitSharedLValue(CGF, Cnt);
+ llvm::Value *CastedShared =
+ CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
+ CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
+ RCG.emitAggregateType(CGF, Cnt);
+ llvm::Value *SizeValInChars;
+ llvm::Value *SizeVal;
+ std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
+ // We use delayed creation/initialization for VLAs, array sections and
+ // custom reduction initializations. It is required because runtime does not
+ // provide the way to pass the sizes of VLAs/array sections to
+ // initializer/combiner/finalizer functions and does not pass the pointer to
+ // original reduction item to the initializer. Instead threadprivate global
+ // variables are used to store these values and use them in the functions.
+ bool DelayedCreation = !!SizeVal;
+ SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
+ /*isSigned=*/false);
+ LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
+ CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
+ // ElemLVal.reduce_init = init;
+ LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
+ llvm::Value *InitAddr =
+ CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
+ CGF.EmitStoreOfScalar(InitAddr, InitLVal);
+ DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
+ // ElemLVal.reduce_fini = fini;
+ LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
+ llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
+ llvm::Value *FiniAddr = Fini
+ ? CGF.EmitCastToVoidPtr(Fini)
+ : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+ CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
+ // ElemLVal.reduce_comb = comb;
+ LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
+ llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
+ CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
+ RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
+ CGF.EmitStoreOfScalar(CombAddr, CombLVal);
+ // ElemLVal.flags = 0;
+ LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
+ if (DelayedCreation) {
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
+ FlagsLVal);
+ } else
+ CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
+ }
+ // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+ /*isSigned=*/true),
+ llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
+ CGM.VoidPtrTy)};
+ return CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
+}
+
+void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG,
+ unsigned N) {
+ auto Sizes = RCG.getSizes(N);
+ // Emit threadprivate global variable if the type is non-constant
+ // (Sizes.second = nullptr).
+ if (Sizes.second) {
+ llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
+ /*isSigned=*/false);
+ Address SizeAddr = getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
+ }
+ // Store address of the original reduction item if custom initializer is used.
+ if (RCG.usesReductionInitializer(N)) {
+ Address SharedAddr = getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().VoidPtrTy,
+ generateUniqueName("reduction", Loc, N));
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
+ SharedAddr, /*IsVolatile=*/false);
+ }
+}
+
+Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ llvm::Value *ReductionsPtr,
+ LValue SharedLVal) {
+ // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+ /*isSigned=*/true),
+ ReductionsPtr,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
+ CGM.VoidPtrTy)};
+ return Address(
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
+ SharedLVal.getAlignment());
+}
+
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
OpenPOWER on IntegriCloud