diff options
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 537 |
1 files changed, 290 insertions, 247 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 6ffcb715c95..a0b3ee5ab01 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -233,9 +233,8 @@ public: LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.MakeNaturalAlignAddrLValue( - CGF.Builder.CreateAlignedLoad( - CGF.GetAddrOfLocalVar(getThreadIDVariable()), - CGF.PointerAlignInBytes), + CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(getThreadIDVariable())), getThreadIDVariable() ->getType() ->castAs<PointerType>() @@ -258,7 +257,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { - return CGF.MakeNaturalAlignAddrLValue( + return CGF.MakeAddrLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType()); } @@ -280,6 +279,25 @@ void CGOpenMPRuntime::clear() { InternalVars.clear(); } +// Layout information for ident_t. +static CharUnits getIdentAlign(CodeGenModule &CGM) { + return CGM.getPointerAlign(); +} +static CharUnits getIdentSize(CodeGenModule &CGM) { + assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); + return CharUnits::fromQuantity(16) + CGM.getPointerSize(); +} +static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { + // All the fields except the last are i32, so this works beautifully. + return unsigned(Field) * CharUnits::fromQuantity(4); +} +static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, + CGOpenMPRuntime::IdentFieldIndex Field, + const llvm::Twine &Name = "") { + auto Offset = getOffsetOfIdentField(Field); + return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); +} + llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -305,8 +323,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( return CGF.GenerateCapturedStmtFunction(*CS); } -llvm::Value * -CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { +Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { + CharUnits Align = getIdentAlign(CGM); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { if (!DefaultOpenMPPSource) { @@ -315,7 +333,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { // Taken from // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c DefaultOpenMPPSource = - CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); + CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } @@ -323,6 +341,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { CGM.getModule(), IdentTy, /*isConstant*/ true, llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); DefaultOpenMPLocation->setUnnamedAddr(true); + DefaultOpenMPLocation->setAlignment(Align.getQuantity()); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); llvm::Constant *Values[] = {Zero, @@ -330,10 +349,9 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { Zero, Zero, DefaultOpenMPPSource}; llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); DefaultOpenMPLocation->setInitializer(Init); - OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; - return DefaultOpenMPLocation; + OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; } - return Entry; + return Address(Entry, Align); } llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, @@ -342,34 +360,33 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, // If no debug info is generated - return global default location. if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || Loc.isInvalid()) - return getOrCreateDefaultLocation(Flags); + return getOrCreateDefaultLocation(Flags).getPointer(); assert(CGF.CurFn && "No function in current CodeGenFunction."); - llvm::Value *LocValue = nullptr; + Address LocValue = Address::invalid(); auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) - LocValue = I->second.DebugLoc; + LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); + // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if // GetOpenMPThreadID was called before this routine. - if (LocValue == nullptr) { + if (!LocValue.isValid()) { // Generate "ident_t .kmpc_loc.addr;" - llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); - AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); + Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), + ".kmpc_loc.addr"); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.DebugLoc = AI; + Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), - llvm::ConstantExpr::getSizeOf(IdentTy), - CGM.PointerAlignInBytes); + CGM.getSize(getIdentSize(CGF.CGM))); } // char **psource = &.kmpc_loc_<flags>.addr.psource; - auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0, - IdentField_PSource); + Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); if (OMPDebugLoc == nullptr) { @@ -389,7 +406,9 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, // *psource = ";<File>;<Function>;<Line>;<Column>;;"; CGF.Builder.CreateStore(OMPDebugLoc, PSource); - return LocValue; + // Our callers always pass this to a runtime function, so for + // convenience, go ahead and return a naked pointer. + return LocValue.getPointer(); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, @@ -939,25 +958,27 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { Twine(CGM.getMangledName(VD)) + ".cache."); } -llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, - const VarDecl *VD, - llvm::Value *VDAddr, - SourceLocation Loc) { +Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, + Address VDAddr, + SourceLocation Loc) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return VDAddr; - auto VarTy = VDAddr->getType()->getPointerElementType(); + auto VarTy = VDAddr.getElementType(); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), + CGF.Builder.CreatePointerCast(VDAddr.getPointer(), + CGM.Int8PtrTy), CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), getOrCreateThreadPrivateCache(VD)}; - return CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); + return Address(CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + VDAddr.getAlignment()); } void CGOpenMPRuntime::emitThreadPrivateVarInit( - CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, + CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. @@ -967,14 +988,15 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. llvm::Value *Args[] = {OMPLoc, - CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), + CGF.Builder.CreatePointerCast(VDAddr.getPointer(), + CGM.VoidPtrTy), Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); } llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( - const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, + const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) @@ -1005,17 +1027,15 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, Args, SourceLocation()); auto ArgVal = CtorCGF.EmitLoadOfScalar( - CtorCGF.GetAddrOfLocalVar(&Dst), - /*Volatile=*/false, CGM.PointerAlignInBytes, + CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); - auto Arg = CtorCGF.Builder.CreatePointerCast( - ArgVal, - CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); + Address Arg = Address(ArgVal, VDAddr.getAlignment()); + Arg = CtorCGF.Builder.CreateElementBitCast(Arg, + CtorCGF.ConvertTypeForMem(ASTTy)); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( - CtorCGF.GetAddrOfLocalVar(&Dst), - /*Volatile=*/false, CGM.PointerAlignInBytes, + CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); CtorCGF.FinishFunction(); @@ -1040,9 +1060,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( SourceLocation()); auto ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), - /*Volatile=*/false, CGM.PointerAlignInBytes, - CGM.getContext().VoidPtrTy, Dst.getLocation()); - DtorCGF.emitDestroy(ArgVal, ASTTy, + /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); + DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); DtorCGF.FinishFunction(); @@ -1149,7 +1168,7 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, - llvm::Value *CapturedStruct, + Address CapturedStruct, const Expr *IfCond) { auto *RTLoc = emitUpdateLocation(CGF, Loc); auto &&ThenGen = @@ -1162,7 +1181,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1), // Number of arguments after 'microtask' argument // (there is only one additional argument - 'context') CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), - CGF.EmitCastToVoidPtr(CapturedStruct)}; + CGF.EmitCastToVoidPtr(CapturedStruct.getPointer())}; auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, Args); }; @@ -1177,11 +1196,15 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, // OutlinedFn(>id, &zero, CapturedStruct); auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); - auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, - /*Signed*/ true); - auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); + Address ZeroAddr = + CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); - llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; + llvm::Value *OutlinedFnArgs[] = { + ThreadIDAddr.getPointer(), + ZeroAddr.getPointer(), + CapturedStruct.getPointer() + }; CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); @@ -1203,8 +1226,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, // regular serial code region, get thread ID by calling kmp_int32 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and // return the address of that temp. -llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, - SourceLocation Loc) { +Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, + SourceLocation Loc) { if (auto OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) @@ -1215,7 +1238,7 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); CGF.EmitStoreOfScalar(ThreadID, - CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); + CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); return ThreadIDTemp; } @@ -1353,6 +1376,22 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, } } +/// Given an array of pointers to variables, project the address of a +/// given variable. +static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, + Address Array, unsigned Index, + const VarDecl *Var) { + // Pull out the pointer to the variable. + Address PtrAddr = + CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); + llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); + + Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); + Addr = CGF.Builder.CreateElementBitCast(Addr, + CGF.ConvertTypeForMem(Var->getType())); + return Addr; +} + static llvm::Value *emitCopyprivateCopyFunction( CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, @@ -1377,35 +1416,26 @@ static llvm::Value *emitCopyprivateCopyFunction( CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); - auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), - CGF.PointerAlignInBytes), - ArgsType); - auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), - CGF.PointerAlignInBytes), - ArgsType); + Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), + ArgsType), CGF.getPointerAlign()); + Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), + ArgsType), CGF.getPointerAlign()); // *(Type0*)Dst[0] = *(Type0*)Src[0]; // *(Type1*)Dst[1] = *(Type1*)Src[1]; // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { - auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad( - CGF.Builder.CreateStructGEP(nullptr, LHS, I), - CGM.PointerAlignInBytes), - CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); - auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad( - CGF.Builder.CreateStructGEP(nullptr, RHS, I), - CGM.PointerAlignInBytes), - CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); + auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); + Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); + + auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); + Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); + auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); QualType Type = VD->getType(); - CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr, - cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()), - cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), - AssignmentOps[I]); + CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); } CGF.FinishFunction(); return Fn; @@ -1431,13 +1461,12 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); - llvm::AllocaInst *DidIt = nullptr; + Address DidIt = Address::invalid(); if (!CopyprivateVars.empty()) { // int32 did_it = 0; auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); - CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt, - DidIt->getAlignment()); + CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; @@ -1452,29 +1481,28 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), llvm::makeArrayRef(Args)); SingleOpGen(CGF); - if (DidIt) { + if (DidIt.isValid()) { // did_it = 1; - CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, - DidIt->getAlignment()); + CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); } }); // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); - if (DidIt) { + if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); auto CopyprivateArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. - auto *CopyprivateList = + Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { - auto *Elem = CGF.Builder.CreateStructGEP( - CopyprivateList->getAllocatedType(), CopyprivateList, I); - CGF.Builder.CreateAlignedStore( + Address Elem = CGF.Builder.CreateConstArrayGEP( + CopyprivateList, I, CGF.getPointerSize()); + CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), - Elem, CGM.PointerAlignInBytes); + CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), + Elem); } // Build function that copies private values from single region to all other // threads in the corresponding parallel region. @@ -1483,15 +1511,15 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); auto *BufSize = llvm::ConstantInt::get( CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); - auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, - CGF.VoidPtrTy); - auto *DidItVal = - CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); + Address CL = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, + CGF.VoidPtrTy); + auto *DidItVal = CGF.Builder.CreateLoad(DidIt); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), // ident_t *<loc> getThreadID(CGF, Loc), // i32 <gtid> BufSize, // size_t <buf_size> - CL, // void *<copyprivate list> + CL.getPointer(), // void *<copyprivate list> CpyFn, // void (*) (void *, void *) <copy_func> DidItVal // i32 did_it }; @@ -1625,61 +1653,77 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } -void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPScheduleClauseKind ScheduleKind, - unsigned IVSize, bool IVSigned, bool Ordered, - llvm::Value *IL, llvm::Value *LB, - llvm::Value *UB, llvm::Value *ST, - llvm::Value *Chunk) { +void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind, + unsigned IVSize, bool IVSigned, + bool Ordered, llvm::Value *UB, + llvm::Value *Chunk) { OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); - if (Ordered || - (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && - Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) { - // Call __kmpc_dispatch_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, - // kmp_int[32|64] lower, kmp_int[32|64] upper, - // kmp_int[32|64] stride, kmp_int[32|64] chunk); + assert(Ordered || + (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && + Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); + // Call __kmpc_dispatch_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, + // kmp_int[32|64] lower, kmp_int[32|64] upper, + // kmp_int[32|64] stride, kmp_int[32|64] chunk); + + // If the Chunk was not specified in the clause - use default value 1. + if (Chunk == nullptr) + Chunk = CGF.Builder.getIntN(IVSize, 1); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + getThreadID(CGF, Loc), + CGF.Builder.getInt32(Schedule), // Schedule type + CGF.Builder.getIntN(IVSize, 0), // Lower + UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk + }; + CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); +} +void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind, + unsigned IVSize, bool IVSigned, + bool Ordered, Address IL, Address LB, + Address UB, Address ST, + llvm::Value *Chunk) { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); + assert(!Ordered); + assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || + Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); + + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + if (Chunk == nullptr) { + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && + "expected static non-chunked schedule"); // If the Chunk was not specified in the clause - use default value 1. - if (Chunk == nullptr) Chunk = CGF.Builder.getIntN(IVSize, 1); - llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - CGF.Builder.getIntN(IVSize, 0), // Lower - UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk - }; - CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } else { - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - if (Chunk == nullptr) { - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && - "expected static non-chunked schedule"); - // If the Chunk was not specified in the clause - use default value 1. - Chunk = CGF.Builder.getIntN(IVSize, 1); - } else - assert((Schedule == OMP_sch_static_chunked || - Schedule == OMP_ord_static_chunked) && - "expected static chunked schedule"); - llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), - getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - IL, // &isLastIter - LB, // &LB - UB, // &UB - ST, // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_ord_static_chunked) && + "expected static chunked schedule"); } + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + getThreadID(CGF, Loc), + CGF.Builder.getInt32(Schedule), // Schedule type + IL.getPointer(), // &isLastIter + LB.getPointer(), // &LB + UB.getPointer(), // &UB + ST.getPointer(), // &Stride + CGF.Builder.getIntN(IVSize, 1), // Incr + Chunk // Chunk + }; + CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, @@ -1703,19 +1747,19 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, - bool IVSigned, llvm::Value *IL, - llvm::Value *LB, llvm::Value *UB, - llvm::Value *ST) { + bool IVSigned, Address IL, + Address LB, Address UB, + Address ST) { // Call __kmpc_dispatch_next( // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, // kmp_int[32|64] *p_stride); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), - IL, // &isLastIter - LB, // &Lower - UB, // &Upper - ST // &Stride + IL.getPointer(), // &isLastIter + LB.getPointer(), // &Lower + UB.getPointer(), // &Upper + ST.getPointer() // &Stride }; llvm::Value *Call = CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); @@ -1921,10 +1965,9 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( - CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, - C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); - auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( - CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); + CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); + auto *TaskTypeArgAddr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(&TaskTypeArg)); LValue TDBase = CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesQTyRD = @@ -1947,7 +1990,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivatesLVal.getAddress(), CGF.VoidPtrTy); + PrivatesLVal.getPointer(), CGF.VoidPtrTy); } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -1957,7 +2000,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), - CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); + CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); CGF.FinishFunction(); return TaskEntry; } @@ -1988,8 +2031,8 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, Args); - auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad( - CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes); + auto *TaskTypeArgAddr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(&TaskTypeArg)); LValue Base = CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); auto *KmpTaskTWithPrivatesQTyRD = @@ -2069,8 +2112,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMapFnInfo, Args); // *privi = &.privates.privi; - auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad( - CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes); + auto *TaskPrivatesArgAddr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(&TaskPrivatesArg)); LValue Base = CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy); auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); @@ -2078,11 +2121,10 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, for (auto *Field : PrivatesQTyRD->fields()) { auto FieldLVal = CGF.EmitLValueForField(Base, Field); auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; - auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD), - VD->getType()); + auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc); CGF.EmitStoreOfScalar( - FieldLVal.getAddress(), + FieldLVal.getPointer(), CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(), RefLVal.getType()->getPointeeType())); ++Counter; @@ -2120,7 +2162,7 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, void CGOpenMPRuntime::emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, - llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds, + llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies, ArrayRef<const Expr *> FirstprivateVars, @@ -2227,12 +2269,12 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); // Fill the data in the resulting kmp_task_t record. // Copy shareds if there are any. - llvm::Value *KmpTaskSharedsPtr = nullptr; + Address KmpTaskSharedsPtr = Address::invalid(); if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { - KmpTaskSharedsPtr = CGF.EmitLoadOfScalar( + KmpTaskSharedsPtr = Address(CGF.EmitLoadOfScalar( CGF.EmitLValueForField( TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), - Loc); + Loc), CGF.getNaturalTypeAlignment(SharedsTy)); CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); } // Emit initial values for private copies (if any). @@ -2243,7 +2285,7 @@ void CGOpenMPRuntime::emitTaskCall( FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); LValue SharedsBase; if (!FirstprivateVars.empty()) { - SharedsBase = CGF.MakeNaturalAlignAddrLValue( + SharedsBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); @@ -2274,10 +2316,10 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitOMPAggregateAssign( PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo]( - llvm::Value *DestElement, llvm::Value *SrcElement) { + Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{ + InitScope.addPrivate(Elem, [SrcElement]() -> Address { return SrcElement; }); (void)InitScope.Privatize(); @@ -2291,7 +2333,7 @@ void CGOpenMPRuntime::emitTaskCall( } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{ + InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { return SharedRefLValue.getAddress(); }); (void)InitScope.Privatize(); @@ -2321,9 +2363,9 @@ void CGOpenMPRuntime::emitTaskCall( Destructor); // Process list of dependences. - llvm::Value *DependInfo = nullptr; - unsigned DependencesNumber = Dependences.size(); - if (!Dependences.empty()) { + Address DependenciesArray = Address::invalid(); + unsigned NumDependencies = Dependences.size(); + if (NumDependencies) { // Dependence kind for RTL. enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; @@ -2342,37 +2384,39 @@ void CGOpenMPRuntime::emitTaskCall( } else { KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); } + CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( - KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()), + KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; - DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy); - for (unsigned i = 0; i < DependencesNumber; ++i) { - auto *E = Dependences[i].second; - LValue Addr = CGF.EmitLValue(E); + DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); + for (unsigned i = 0; i < NumDependencies; ++i) { + const Expr *E = Dependences[i].second; + auto Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); - auto *DestAddr = Addr.getAddress(); if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); llvm::Value *UpAddr = - CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getAddress(), /*Idx0=*/1); + CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); llvm::Value *LowIntPtr = - CGF.Builder.CreatePtrToInt(DestAddr, CGM.SizeTy); + CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else + } else { Size = getTypeSize(CGF, Ty); - auto Base = CGF.MakeNaturalAlignAddrLValue( - CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i), + } + auto Base = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), KmpDependInfoTy); // deps[i].base_addr = &<Dependences[i].second>; auto BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); - CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(DestAddr, CGF.IntPtrTy), - BaseAddrLVal); + CGF.EmitStoreOfScalar( + CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), + BaseAddrLVal); // deps[i].len = sizeof(<Dependences[i].second>); auto LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); @@ -2397,8 +2441,8 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), FlagsLVal); } - DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0), + DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), CGF.VoidPtrTy); } @@ -2412,40 +2456,48 @@ void CGOpenMPRuntime::emitTaskCall( // list is not empty auto *ThreadID = getThreadID(CGF, Loc); auto *UpLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask}; - llvm::Value *DepTaskArgs[] = { - UpLoc, - ThreadID, - NewTask, - DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr, - DependInfo, - DependInfo ? CGF.Builder.getInt32(0) : nullptr, - DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr}; - auto &&ThenCodeGen = [this, DependInfo, &TaskArgs, - &DepTaskArgs](CodeGenFunction &CGF) { - // TODO: add check for untied tasks. - CGF.EmitRuntimeCall( - createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps - : OMPRTL__kmpc_omp_task), - DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs)); + llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; + llvm::Value *DepTaskArgs[7]; + if (NumDependencies) { + DepTaskArgs[0] = UpLoc; + DepTaskArgs[1] = ThreadID; + DepTaskArgs[2] = NewTask; + DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); + DepTaskArgs[4] = DependenciesArray.getPointer(); + DepTaskArgs[5] = CGF.Builder.getInt32(0); + DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } + auto &&ThenCodeGen = [this, NumDependencies, + &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { + // TODO: add check for untied tasks. + if (NumDependencies) { + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), + DepTaskArgs); + } else { + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), + TaskArgs); + } }; typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> IfCallEndCleanup; - llvm::Value *DepWaitTaskArgs[] = { - UpLoc, - ThreadID, - DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr, - DependInfo, - DependInfo ? CGF.Builder.getInt32(0) : nullptr, - DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr}; + + llvm::Value *DepWaitTaskArgs[6]; + if (NumDependencies) { + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); + DepWaitTaskArgs[3] = DependenciesArray.getPointer(); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) { + NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. - if (DependInfo) + if (NumDependencies) CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, @@ -2463,6 +2515,7 @@ void CGOpenMPRuntime::emitTaskCall( llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); }; + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); } else { @@ -2498,38 +2551,26 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, // Dst = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); - auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), - CGF.PointerAlignInBytes), - ArgsType); - auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), - CGF.PointerAlignInBytes), - ArgsType); + Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), + ArgsType), CGF.getPointerAlign()); + Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), + ArgsType), CGF.getPointerAlign()); // ... // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); // ... CodeGenFunction::OMPPrivateScope Scope(CGF); for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { - Scope.addPrivate( - cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()), - [&]() -> llvm::Value *{ - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad( - CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I), - CGM.PointerAlignInBytes), - CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType()))); - }); - Scope.addPrivate( - cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()), - [&]() -> llvm::Value *{ - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateAlignedLoad( - CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I), - CGM.PointerAlignInBytes), - CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType()))); - }); + auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); + Scope.addPrivate(RHSVar, [&]() -> Address { + return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar); + }); + auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); + Scope.addPrivate(LHSVar, [&]() -> Address { + return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar); + }); } Scope.Privatize(); for (auto *E : ReductionOps) { @@ -2596,14 +2637,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, QualType ReductionArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); - auto *ReductionList = + Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { - auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I); - CGF.Builder.CreateAlignedStore( + Address Elem = + CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize()); + CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy), - Elem, CGM.PointerAlignInBytes); + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); } // 2. Emit reduce_func(). @@ -2622,8 +2664,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto *ThreadId = getThreadID(CGF, Loc); auto *ReductionArrayTySize = llvm::ConstantInt::get( CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); - auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, - CGF.VoidPtrTy); + auto *RL = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), + CGF.VoidPtrTy); llvm::Value *Args[] = { IdentTLoc, // ident_t *<loc> ThreadId, // i32 <gtid> @@ -2736,11 +2779,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, [&CGF, UpExpr, VD](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue]() -> llvm::Value *{ - auto *LHSTemp = CGF.CreateMemTemp(VD->getType()); + VD, [&CGF, VD, XRValue]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); CGF.EmitStoreThroughLValue( XRValue, - CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType())); + CGF.MakeAddrLValue(LHSTemp, VD->getType())); return LHSTemp; }); (void)PrivateScope.Privatize(); |