summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
authorJohn McCall <rjmccall@apple.com>2015-09-08 08:05:57 +0000
committerJohn McCall <rjmccall@apple.com>2015-09-08 08:05:57 +0000
commit7f416cc426384ad1f891addb61d93e7ca1ffa0f2 (patch)
treef30c1142c284b5507df7f2e9644cbacce21e4a8a /clang/lib/CodeGen/CGOpenMPRuntime.cpp
parentbb7483dd77bc48e3af2dd534d8ca65f6accd315f (diff)
downloadbcm5719-llvm-7f416cc426384ad1f891addb61d93e7ca1ffa0f2.tar.gz
bcm5719-llvm-7f416cc426384ad1f891addb61d93e7ca1ffa0f2.zip
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an alignment. Introduce APIs on CGBuilderTy to work with Address values. Change core APIs on CGF/CGM to traffic in Address where appropriate. Require alignments to be non-zero. Update a ton of code to compute and propagate alignment information. As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment helper function to CGF and made use of it in a number of places in the expression emitter. The end result is that we should now be significantly more correct when performing operations on objects that are locally known to be under-aligned. Since alignment is not reliably tracked in the type system, there are inherent limits to this, but at least we are no longer confused by standard operations like derived-to-base conversions and array-to-pointer decay. I've also fixed a large number of bugs where we were applying the complete-object alignment to a pointer instead of the non-virtual alignment, although most of these were hidden by the very conservative approach we took with member alignment. Also, because IRGen now reliably asserts on zero alignments, we should no longer be subject to an absurd but frustrating recurring bug where an incomplete type would report a zero alignment and then we'd naively do a alignmentAtOffset on it and emit code using an alignment equal to the largest power-of-two factor of the offset. We should also now be emitting much more aggressive alignment attributes in the presence of over-alignment. In particular, field access now uses alignmentAtOffset instead of min. Several times in this patch, I had to change the existing code-generation pattern in order to more effectively use the Address APIs. For the most part, this seems to be a strict improvement, like doing pointer arithmetic with GEPs instead of ptrtoint. That said, I've tried very hard to not change semantics, but it is likely that I've failed in a few places, for which I apologize. ABIArgInfo now always carries the assumed alignment of indirect and indirect byval arguments. In order to cut down on what was already a dauntingly large patch, I changed the code to never set align attributes in the IR on non-byval indirect arguments. That is, we still generate code which assumes that indirect arguments have the given alignment, but we don't express this information to the backend except where it's semantically required (i.e. on byvals). This is likely a minor regression for those targets that did provide this information, but it'll be trivial to add it back in a later patch. I partially punted on applying this work to CGBuiltin. Please do not add more uses of the CreateDefaultAligned{Load,Store} APIs; they will be going away eventually. llvm-svn: 246985
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp537
1 files changed, 290 insertions, 247 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 6ffcb715c95..a0b3ee5ab01 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -233,9 +233,8 @@ public:
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.MakeNaturalAlignAddrLValue(
- CGF.Builder.CreateAlignedLoad(
- CGF.GetAddrOfLocalVar(getThreadIDVariable()),
- CGF.PointerAlignInBytes),
+ CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(getThreadIDVariable())),
getThreadIDVariable()
->getType()
->castAs<PointerType>()
@@ -258,7 +257,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
- return CGF.MakeNaturalAlignAddrLValue(
+ return CGF.MakeAddrLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType());
}
@@ -280,6 +279,25 @@ void CGOpenMPRuntime::clear() {
InternalVars.clear();
}
+// Layout information for ident_t.
+static CharUnits getIdentAlign(CodeGenModule &CGM) {
+ return CGM.getPointerAlign();
+}
+static CharUnits getIdentSize(CodeGenModule &CGM) {
+ assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
+ return CharUnits::fromQuantity(16) + CGM.getPointerSize();
+}
+static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
+ // All the fields except the last are i32, so this works beautifully.
+ return unsigned(Field) * CharUnits::fromQuantity(4);
+}
+static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
+ CGOpenMPRuntime::IdentFieldIndex Field,
+ const llvm::Twine &Name = "") {
+ auto Offset = getOffsetOfIdentField(Field);
+ return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
+}
+
llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -305,8 +323,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
return CGF.GenerateCapturedStmtFunction(*CS);
}
-llvm::Value *
-CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
+Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
+ CharUnits Align = getIdentAlign(CGM);
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
if (!Entry) {
if (!DefaultOpenMPPSource) {
@@ -315,7 +333,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
// Taken from
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
DefaultOpenMPPSource =
- CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
+ CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
DefaultOpenMPPSource =
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
@@ -323,6 +341,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
CGM.getModule(), IdentTy, /*isConstant*/ true,
llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
DefaultOpenMPLocation->setUnnamedAddr(true);
+ DefaultOpenMPLocation->setAlignment(Align.getQuantity());
llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
llvm::Constant *Values[] = {Zero,
@@ -330,10 +349,9 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
Zero, Zero, DefaultOpenMPPSource};
llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
DefaultOpenMPLocation->setInitializer(Init);
- OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
- return DefaultOpenMPLocation;
+ OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
}
- return Entry;
+ return Address(Entry, Align);
}
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
@@ -342,34 +360,33 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
// If no debug info is generated - return global default location.
if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
Loc.isInvalid())
- return getOrCreateDefaultLocation(Flags);
+ return getOrCreateDefaultLocation(Flags).getPointer();
assert(CGF.CurFn && "No function in current CodeGenFunction.");
- llvm::Value *LocValue = nullptr;
+ Address LocValue = Address::invalid();
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end())
- LocValue = I->second.DebugLoc;
+ LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
+
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
// GetOpenMPThreadID was called before this routine.
- if (LocValue == nullptr) {
+ if (!LocValue.isValid()) {
// Generate "ident_t .kmpc_loc.addr;"
- llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
- AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
+ Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
+ ".kmpc_loc.addr");
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
- Elem.second.DebugLoc = AI;
+ Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
- llvm::ConstantExpr::getSizeOf(IdentTy),
- CGM.PointerAlignInBytes);
+ CGM.getSize(getIdentSize(CGF.CGM)));
}
// char **psource = &.kmpc_loc_<flags>.addr.psource;
- auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
- IdentField_PSource);
+ Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
if (OMPDebugLoc == nullptr) {
@@ -389,7 +406,9 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
CGF.Builder.CreateStore(OMPDebugLoc, PSource);
- return LocValue;
+ // Our callers always pass this to a runtime function, so for
+ // convenience, go ahead and return a naked pointer.
+ return LocValue.getPointer();
}
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
@@ -939,25 +958,27 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
Twine(CGM.getMangledName(VD)) + ".cache.");
}
-llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
- const VarDecl *VD,
- llvm::Value *VDAddr,
- SourceLocation Loc) {
+Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
+ const VarDecl *VD,
+ Address VDAddr,
+ SourceLocation Loc) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return VDAddr;
- auto VarTy = VDAddr->getType()->getPointerElementType();
+ auto VarTy = VDAddr.getElementType();
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
- CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
+ CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
+ CGM.Int8PtrTy),
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
getOrCreateThreadPrivateCache(VD)};
- return CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
+ return Address(CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+ VDAddr.getAlignment());
}
void CGOpenMPRuntime::emitThreadPrivateVarInit(
- CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
+ CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
// library.
@@ -967,14 +988,15 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
// to register constructor/destructor for variable.
llvm::Value *Args[] = {OMPLoc,
- CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
+ CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
+ CGM.VoidPtrTy),
Ctor, CopyCtor, Dtor};
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
}
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
- const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
+ const VarDecl *VD, Address VDAddr, SourceLocation Loc,
bool PerformInit, CodeGenFunction *CGF) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
@@ -1005,17 +1027,15 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
Args, SourceLocation());
auto ArgVal = CtorCGF.EmitLoadOfScalar(
- CtorCGF.GetAddrOfLocalVar(&Dst),
- /*Volatile=*/false, CGM.PointerAlignInBytes,
+ CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
- auto Arg = CtorCGF.Builder.CreatePointerCast(
- ArgVal,
- CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
+ Address Arg = Address(ArgVal, VDAddr.getAlignment());
+ Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
+ CtorCGF.ConvertTypeForMem(ASTTy));
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
/*IsInitializer=*/true);
ArgVal = CtorCGF.EmitLoadOfScalar(
- CtorCGF.GetAddrOfLocalVar(&Dst),
- /*Volatile=*/false, CGM.PointerAlignInBytes,
+ CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
CtorCGF.FinishFunction();
@@ -1040,9 +1060,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
SourceLocation());
auto ArgVal = DtorCGF.EmitLoadOfScalar(
DtorCGF.GetAddrOfLocalVar(&Dst),
- /*Volatile=*/false, CGM.PointerAlignInBytes,
- CGM.getContext().VoidPtrTy, Dst.getLocation());
- DtorCGF.emitDestroy(ArgVal, ASTTy,
+ /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
+ DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
DtorCGF.FinishFunction();
@@ -1149,7 +1168,7 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
- llvm::Value *CapturedStruct,
+ Address CapturedStruct,
const Expr *IfCond) {
auto *RTLoc = emitUpdateLocation(CGF, Loc);
auto &&ThenGen =
@@ -1162,7 +1181,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1), // Number of arguments after 'microtask' argument
// (there is only one additional argument - 'context')
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
- CGF.EmitCastToVoidPtr(CapturedStruct)};
+ CGF.EmitCastToVoidPtr(CapturedStruct.getPointer())};
auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, Args);
};
@@ -1177,11 +1196,15 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
// OutlinedFn(&GTid, &zero, CapturedStruct);
auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
- auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
- /*Signed*/ true);
- auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
+ Address ZeroAddr =
+ CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+ /*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
- llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
+ llvm::Value *OutlinedFnArgs[] = {
+ ThreadIDAddr.getPointer(),
+ ZeroAddr.getPointer(),
+ CapturedStruct.getPointer()
+ };
CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
@@ -1203,8 +1226,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
// regular serial code region, get thread ID by calling kmp_int32
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
// return the address of that temp.
-llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
- SourceLocation Loc) {
+Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
+ SourceLocation Loc) {
if (auto OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable())
@@ -1215,7 +1238,7 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
CGF.EmitStoreOfScalar(ThreadID,
- CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
+ CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
return ThreadIDTemp;
}
@@ -1353,6 +1376,22 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
}
}
+/// Given an array of pointers to variables, project the address of a
+/// given variable.
+static Address emitAddrOfVarFromArray(CodeGenFunction &CGF,
+ Address Array, unsigned Index,
+ const VarDecl *Var) {
+ // Pull out the pointer to the variable.
+ Address PtrAddr =
+ CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
+ llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
+
+ Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
+ Addr = CGF.Builder.CreateElementBitCast(Addr,
+ CGF.ConvertTypeForMem(Var->getType()));
+ return Addr;
+}
+
static llvm::Value *emitCopyprivateCopyFunction(
CodeGenModule &CGM, llvm::Type *ArgsType,
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
@@ -1377,35 +1416,26 @@ static llvm::Value *emitCopyprivateCopyFunction(
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
// Dest = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
- auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
- CGF.PointerAlignInBytes),
- ArgsType);
- auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
- CGF.PointerAlignInBytes),
- ArgsType);
+ Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
+ ArgsType), CGF.getPointerAlign());
+ Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
+ ArgsType), CGF.getPointerAlign());
// *(Type0*)Dst[0] = *(Type0*)Src[0];
// *(Type1*)Dst[1] = *(Type1*)Src[1];
// ...
// *(Typen*)Dst[n] = *(Typen*)Src[n];
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
- auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(
- CGF.Builder.CreateStructGEP(nullptr, LHS, I),
- CGM.PointerAlignInBytes),
- CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
- auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(
- CGF.Builder.CreateStructGEP(nullptr, RHS, I),
- CGM.PointerAlignInBytes),
- CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+ auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
+ Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
+
+ auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
+ Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
+
auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
QualType Type = VD->getType();
- CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
- cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
- cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
- AssignmentOps[I]);
+ CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
}
CGF.FinishFunction();
return Fn;
@@ -1431,13 +1461,12 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
- llvm::AllocaInst *DidIt = nullptr;
+ Address DidIt = Address::invalid();
if (!CopyprivateVars.empty()) {
// int32 did_it = 0;
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
- CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
- DidIt->getAlignment());
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
}
// Prepare arguments and build a call to __kmpc_single
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
@@ -1452,29 +1481,28 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
llvm::makeArrayRef(Args));
SingleOpGen(CGF);
- if (DidIt) {
+ if (DidIt.isValid()) {
// did_it = 1;
- CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
- DidIt->getAlignment());
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
}
});
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
- if (DidIt) {
+ if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
auto CopyprivateArrayTy =
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
- auto *CopyprivateList =
+ Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
- auto *Elem = CGF.Builder.CreateStructGEP(
- CopyprivateList->getAllocatedType(), CopyprivateList, I);
- CGF.Builder.CreateAlignedStore(
+ Address Elem = CGF.Builder.CreateConstArrayGEP(
+ CopyprivateList, I, CGF.getPointerSize());
+ CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
- Elem, CGM.PointerAlignInBytes);
+ CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
+ Elem);
}
// Build function that copies private values from single region to all other
// threads in the corresponding parallel region.
@@ -1483,15 +1511,15 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
auto *BufSize = llvm::ConstantInt::get(
CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
- auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
- CGF.VoidPtrTy);
- auto *DidItVal =
- CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
+ Address CL =
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
+ CGF.VoidPtrTy);
+ auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
getThreadID(CGF, Loc), // i32 <gtid>
BufSize, // size_t <buf_size>
- CL, // void *<copyprivate list>
+ CL.getPointer(), // void *<copyprivate list>
CpyFn, // void (*) (void *, void *) <copy_func>
DidItVal // i32 did_it
};
@@ -1625,61 +1653,77 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
return Schedule != OMP_sch_static;
}
-void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
- OpenMPScheduleClauseKind ScheduleKind,
- unsigned IVSize, bool IVSigned, bool Ordered,
- llvm::Value *IL, llvm::Value *LB,
- llvm::Value *UB, llvm::Value *ST,
- llvm::Value *Chunk) {
+void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind,
+ unsigned IVSize, bool IVSigned,
+ bool Ordered, llvm::Value *UB,
+ llvm::Value *Chunk) {
OpenMPSchedType Schedule =
getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
- if (Ordered ||
- (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
- Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
- // Call __kmpc_dispatch_init(
- // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
- // kmp_int[32|64] lower, kmp_int[32|64] upper,
- // kmp_int[32|64] stride, kmp_int[32|64] chunk);
+ assert(Ordered ||
+ (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
+ Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
+ // Call __kmpc_dispatch_init(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
+ // kmp_int[32|64] lower, kmp_int[32|64] upper,
+ // kmp_int[32|64] stride, kmp_int[32|64] chunk);
+
+ // If the Chunk was not specified in the clause - use default value 1.
+ if (Chunk == nullptr)
+ Chunk = CGF.Builder.getIntN(IVSize, 1);
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+ getThreadID(CGF, Loc),
+ CGF.Builder.getInt32(Schedule), // Schedule type
+ CGF.Builder.getIntN(IVSize, 0), // Lower
+ UB, // Upper
+ CGF.Builder.getIntN(IVSize, 1), // Stride
+ Chunk // Chunk
+ };
+ CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
+}
+void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind,
+ unsigned IVSize, bool IVSigned,
+ bool Ordered, Address IL, Address LB,
+ Address UB, Address ST,
+ llvm::Value *Chunk) {
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+ assert(!Ordered);
+ assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
+ Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
+
+ // Call __kmpc_for_static_init(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ if (Chunk == nullptr) {
+ assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
+ "expected static non-chunked schedule");
// If the Chunk was not specified in the clause - use default value 1.
- if (Chunk == nullptr)
Chunk = CGF.Builder.getIntN(IVSize, 1);
- llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
- getThreadID(CGF, Loc),
- CGF.Builder.getInt32(Schedule), // Schedule type
- CGF.Builder.getIntN(IVSize, 0), // Lower
- UB, // Upper
- CGF.Builder.getIntN(IVSize, 1), // Stride
- Chunk // Chunk
- };
- CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
} else {
- // Call __kmpc_for_static_init(
- // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
- // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
- // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
- // kmp_int[32|64] incr, kmp_int[32|64] chunk);
- if (Chunk == nullptr) {
- assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
- "expected static non-chunked schedule");
- // If the Chunk was not specified in the clause - use default value 1.
- Chunk = CGF.Builder.getIntN(IVSize, 1);
- } else
- assert((Schedule == OMP_sch_static_chunked ||
- Schedule == OMP_ord_static_chunked) &&
- "expected static chunked schedule");
- llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
- getThreadID(CGF, Loc),
- CGF.Builder.getInt32(Schedule), // Schedule type
- IL, // &isLastIter
- LB, // &LB
- UB, // &UB
- ST, // &Stride
- CGF.Builder.getIntN(IVSize, 1), // Incr
- Chunk // Chunk
- };
- CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
+ assert((Schedule == OMP_sch_static_chunked ||
+ Schedule == OMP_ord_static_chunked) &&
+ "expected static chunked schedule");
}
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+ getThreadID(CGF, Loc),
+ CGF.Builder.getInt32(Schedule), // Schedule type
+ IL.getPointer(), // &isLastIter
+ LB.getPointer(), // &LB
+ UB.getPointer(), // &UB
+ ST.getPointer(), // &Stride
+ CGF.Builder.getIntN(IVSize, 1), // Incr
+ Chunk // Chunk
+ };
+ CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
}
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
@@ -1703,19 +1747,19 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
SourceLocation Loc, unsigned IVSize,
- bool IVSigned, llvm::Value *IL,
- llvm::Value *LB, llvm::Value *UB,
- llvm::Value *ST) {
+ bool IVSigned, Address IL,
+ Address LB, Address UB,
+ Address ST) {
// Call __kmpc_dispatch_next(
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
// kmp_int[32|64] *p_stride);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
- IL, // &isLastIter
- LB, // &Lower
- UB, // &Upper
- ST // &Stride
+ IL.getPointer(), // &isLastIter
+ LB.getPointer(), // &Lower
+ UB.getPointer(), // &Upper
+ ST.getPointer() // &Stride
};
llvm::Value *Call =
CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
@@ -1921,10 +1965,9 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
// tt->task_data.shareds);
auto *GtidParam = CGF.EmitLoadOfScalar(
- CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
- C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
- auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
- CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
+ CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
+ auto *TaskTypeArgAddr = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(&TaskTypeArg));
LValue TDBase =
CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
auto *KmpTaskTWithPrivatesQTyRD =
@@ -1947,7 +1990,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- PrivatesLVal.getAddress(), CGF.VoidPtrTy);
+ PrivatesLVal.getPointer(), CGF.VoidPtrTy);
} else {
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
@@ -1957,7 +2000,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
CGF.EmitStoreThroughLValue(
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
- CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
+ CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
CGF.FinishFunction();
return TaskEntry;
}
@@ -1988,8 +2031,8 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
Args);
- auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
- CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
+ auto *TaskTypeArgAddr = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(&TaskTypeArg));
LValue Base =
CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
auto *KmpTaskTWithPrivatesQTyRD =
@@ -2069,8 +2112,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
TaskPrivatesMapFnInfo, Args);
// *privi = &.privates.privi;
- auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
- CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
+ auto *TaskPrivatesArgAddr = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(&TaskPrivatesArg));
LValue Base =
CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
@@ -2078,11 +2121,10 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
for (auto *Field : PrivatesQTyRD->fields()) {
auto FieldLVal = CGF.EmitLValueForField(Base, Field);
auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
- auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
- VD->getType());
+ auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
CGF.EmitStoreOfScalar(
- FieldLVal.getAddress(),
+ FieldLVal.getPointer(),
CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
RefLVal.getType()->getPointeeType()));
++Counter;
@@ -2120,7 +2162,7 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1,
void CGOpenMPRuntime::emitTaskCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
- llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+ llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
ArrayRef<const Expr *> FirstprivateVars,
@@ -2227,12 +2269,12 @@ void CGOpenMPRuntime::emitTaskCall(
CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
// Fill the data in the resulting kmp_task_t record.
// Copy shareds if there are any.
- llvm::Value *KmpTaskSharedsPtr = nullptr;
+ Address KmpTaskSharedsPtr = Address::invalid();
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
- KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
+ KmpTaskSharedsPtr = Address(CGF.EmitLoadOfScalar(
CGF.EmitLValueForField(
TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
- Loc);
+ Loc), CGF.getNaturalTypeAlignment(SharedsTy));
CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
}
// Emit initial values for private copies (if any).
@@ -2243,7 +2285,7 @@ void CGOpenMPRuntime::emitTaskCall(
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
LValue SharedsBase;
if (!FirstprivateVars.empty()) {
- SharedsBase = CGF.MakeNaturalAlignAddrLValue(
+ SharedsBase = CGF.MakeAddrLValue(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
SharedsTy);
@@ -2274,10 +2316,10 @@ void CGOpenMPRuntime::emitTaskCall(
CGF.EmitOMPAggregateAssign(
PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
Type, [&CGF, Elem, Init, &CapturesInfo](
- llvm::Value *DestElement, llvm::Value *SrcElement) {
+ Address DestElement, Address SrcElement) {
// Clean up any temporaries needed by the initialization.
CodeGenFunction::OMPPrivateScope InitScope(CGF);
- InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
+ InitScope.addPrivate(Elem, [SrcElement]() -> Address {
return SrcElement;
});
(void)InitScope.Privatize();
@@ -2291,7 +2333,7 @@ void CGOpenMPRuntime::emitTaskCall(
}
} else {
CodeGenFunction::OMPPrivateScope InitScope(CGF);
- InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
+ InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
return SharedRefLValue.getAddress();
});
(void)InitScope.Privatize();
@@ -2321,9 +2363,9 @@ void CGOpenMPRuntime::emitTaskCall(
Destructor);
// Process list of dependences.
- llvm::Value *DependInfo = nullptr;
- unsigned DependencesNumber = Dependences.size();
- if (!Dependences.empty()) {
+ Address DependenciesArray = Address::invalid();
+ unsigned NumDependencies = Dependences.size();
+ if (NumDependencies) {
// Dependence kind for RTL.
enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
@@ -2342,37 +2384,39 @@ void CGOpenMPRuntime::emitTaskCall(
} else {
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
}
+ CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
// Define type kmp_depend_info[<Dependences.size()>];
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
- KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
+ KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependences.size()>] deps;
- DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
- for (unsigned i = 0; i < DependencesNumber; ++i) {
- auto *E = Dependences[i].second;
- LValue Addr = CGF.EmitLValue(E);
+ DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+ for (unsigned i = 0; i < NumDependencies; ++i) {
+ const Expr *E = Dependences[i].second;
+ auto Addr = CGF.EmitLValue(E);
llvm::Value *Size;
QualType Ty = E->getType();
- auto *DestAddr = Addr.getAddress();
if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
LValue UpAddrLVal =
CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
llvm::Value *UpAddr =
- CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getAddress(), /*Idx0=*/1);
+ CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
llvm::Value *LowIntPtr =
- CGF.Builder.CreatePtrToInt(DestAddr, CGM.SizeTy);
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
- } else
+ } else {
Size = getTypeSize(CGF, Ty);
- auto Base = CGF.MakeNaturalAlignAddrLValue(
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
+ }
+ auto Base = CGF.MakeAddrLValue(
+ CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
KmpDependInfoTy);
// deps[i].base_addr = &<Dependences[i].second>;
auto BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
- CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(DestAddr, CGF.IntPtrTy),
- BaseAddrLVal);
+ CGF.EmitStoreOfScalar(
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
+ BaseAddrLVal);
// deps[i].len = sizeof(<Dependences[i].second>);
auto LenLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), Len));
@@ -2397,8 +2441,8 @@ void CGOpenMPRuntime::emitTaskCall(
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
FlagsLVal);
}
- DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
+ DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
CGF.VoidPtrTy);
}
@@ -2412,40 +2456,48 @@ void CGOpenMPRuntime::emitTaskCall(
// list is not empty
auto *ThreadID = getThreadID(CGF, Loc);
auto *UpLoc = emitUpdateLocation(CGF, Loc);
- llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
- llvm::Value *DepTaskArgs[] = {
- UpLoc,
- ThreadID,
- NewTask,
- DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
- DependInfo,
- DependInfo ? CGF.Builder.getInt32(0) : nullptr,
- DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
- auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
- &DepTaskArgs](CodeGenFunction &CGF) {
- // TODO: add check for untied tasks.
- CGF.EmitRuntimeCall(
- createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
- : OMPRTL__kmpc_omp_task),
- DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
+ llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
+ llvm::Value *DepTaskArgs[7];
+ if (NumDependencies) {
+ DepTaskArgs[0] = UpLoc;
+ DepTaskArgs[1] = ThreadID;
+ DepTaskArgs[2] = NewTask;
+ DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
+ DepTaskArgs[4] = DependenciesArray.getPointer();
+ DepTaskArgs[5] = CGF.Builder.getInt32(0);
+ DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ }
+ auto &&ThenCodeGen = [this, NumDependencies,
+ &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
+ // TODO: add check for untied tasks.
+ if (NumDependencies) {
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
+ DepTaskArgs);
+ } else {
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
+ TaskArgs);
+ }
};
typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
IfCallEndCleanup;
- llvm::Value *DepWaitTaskArgs[] = {
- UpLoc,
- ThreadID,
- DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
- DependInfo,
- DependInfo ? CGF.Builder.getInt32(0) : nullptr,
- DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
+
+ llvm::Value *DepWaitTaskArgs[6];
+ if (NumDependencies) {
+ DepWaitTaskArgs[0] = UpLoc;
+ DepWaitTaskArgs[1] = ThreadID;
+ DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
+ DepWaitTaskArgs[3] = DependenciesArray.getPointer();
+ DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
+ DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ }
auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
- DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
+ NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
// is specified.
- if (DependInfo)
+ if (NumDependencies)
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
DepWaitTaskArgs);
// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
@@ -2463,6 +2515,7 @@ void CGOpenMPRuntime::emitTaskCall(
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
};
+
if (IfCond) {
emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
} else {
@@ -2498,38 +2551,26 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
// Dst = (void*[n])(LHSArg);
// Src = (void*[n])(RHSArg);
- auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
- CGF.PointerAlignInBytes),
- ArgsType);
- auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
- CGF.PointerAlignInBytes),
- ArgsType);
+ Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
+ ArgsType), CGF.getPointerAlign());
+ Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
+ ArgsType), CGF.getPointerAlign());
// ...
// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
// ...
CodeGenFunction::OMPPrivateScope Scope(CGF);
for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
- Scope.addPrivate(
- cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
- [&]() -> llvm::Value *{
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
- CGM.PointerAlignInBytes),
- CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
- });
- Scope.addPrivate(
- cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
- [&]() -> llvm::Value *{
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.Builder.CreateAlignedLoad(
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
- CGM.PointerAlignInBytes),
- CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
- });
+ auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
+ Scope.addPrivate(RHSVar, [&]() -> Address {
+ return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar);
+ });
+ auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
+ Scope.addPrivate(LHSVar, [&]() -> Address {
+ return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar);
+ });
}
Scope.Privatize();
for (auto *E : ReductionOps) {
@@ -2596,14 +2637,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
QualType ReductionArrayTy =
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
- auto *ReductionList =
+ Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
- auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
- CGF.Builder.CreateAlignedStore(
+ Address Elem =
+ CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize());
+ CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
- Elem, CGM.PointerAlignInBytes);
+ CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ Elem);
}
// 2. Emit reduce_func().
@@ -2622,8 +2664,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
auto *ThreadId = getThreadID(CGF, Loc);
auto *ReductionArrayTySize = llvm::ConstantInt::get(
CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
- auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
- CGF.VoidPtrTy);
+ auto *RL =
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
+ CGF.VoidPtrTy);
llvm::Value *Args[] = {
IdentTLoc, // ident_t *<loc>
ThreadId, // i32 <gtid>
@@ -2736,11 +2779,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
[&CGF, UpExpr, VD](RValue XRValue) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(
- VD, [&CGF, VD, XRValue]() -> llvm::Value *{
- auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
+ VD, [&CGF, VD, XRValue]() -> Address {
+ Address LHSTemp = CGF.CreateMemTemp(VD->getType());
CGF.EmitStoreThroughLValue(
XRValue,
- CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
+ CGF.MakeAddrLValue(LHSTemp, VD->getType()));
return LHSTemp;
});
(void)PrivateScope.Privatize();
OpenPOWER on IntegriCloud