summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-03-19 17:04:07 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-03-19 17:04:07 +0000
commitb7f3cba84cfb717e4a95f90c6d2a8ec4b20c1d3f (patch)
treee51275009642d5c2c18204ea9739df429980e877 /clang/lib
parent5ccd87233fdac5119d50ebfba413c8f4bc484f16 (diff)
downloadbcm5719-llvm-b7f3cba84cfb717e4a95f90c6d2a8ec4b20c1d3f.tar.gz
bcm5719-llvm-b7f3cba84cfb717e4a95f90c6d2a8ec4b20c1d3f.zip
[OPENMP, NVPTX] Emit correct thread id.
We emitted fake thread id for the outined function in NVPTX codegen. Patch adds emission of the real thread id. llvm-svn: 327867
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h8
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp51
2 files changed, 31 insertions, 28 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 410241b6189..f8a0772ebba 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -268,6 +268,10 @@ protected:
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee,
ArrayRef<llvm::Value *> Args = llvm::None) const;
+ /// \brief Emits address of the word in a memory where current thread id is
+ /// stored.
+ virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+
private:
/// \brief Default const ident_t object used for initialization of all other
/// ident_t objects.
@@ -564,10 +568,6 @@ private:
/// \return Cache variable for the specified threadprivate.
llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
- /// \brief Emits address of the word in a memory where current thread id is
- /// stored.
- virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
-
/// \brief Gets (if variable with the given name already exist) or creates
/// internal global variable with the specified Name. The created variable has
/// linkage CommonLinkage by default and is initialized by null value.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 0eb01e7afe7..c2b846e83a1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -608,7 +608,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
CGF.EmitBlock(WorkerBB);
- emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn);
+ emitCall(CGF, WST.Loc, WST.WorkerFn);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(MasterCheckBB);
@@ -831,10 +831,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Insert call to work function via shared wrapper. The shared
// wrapper takes two arguments:
// - the parallelism level;
- // - the master thread ID;
- emitOutlinedFunctionCall(CGF, WST.Loc, W,
- {Bld.getInt16(/*ParallelLevel=*/0),
- getMasterThreadID(CGF)});
+ // - the thread ID;
+ emitCall(CGF, WST.Loc, W,
+ {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
// Go to end of parallel region.
CGF.EmitBranch(TerminateBB);
@@ -1316,12 +1315,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
- Address ZeroAddr =
- CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ /*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -1350,7 +1349,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF,
+ auto &&L0ParallelGen = [this, WFn, CapturedVars](CodeGenFunction &CGF,
PrePostActionTy &) {
CGBuilderTy &Bld = CGF.Builder;
@@ -1420,17 +1419,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
auto *ThreadID = getThreadID(CGF, Loc);
llvm::Value *Args[] = {RTLoc, ThreadID};
- auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
- PrePostActionTy &) {
- auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&SeqGen = [this, Fn, CapturedVars, Args, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ auto &&CodeGen = [this, Fn, CapturedVars, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ Address ZeroAddr =
+ CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/32, /*Signed=*/1),
+ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
};
@@ -1468,7 +1470,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -2873,14 +2875,15 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ /*Name*/ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
- // TODO: suppport SIMD and pass actual values
- Args.emplace_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- Args.emplace_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
+ Args.emplace_back(ZeroAddr.getPointer());
CGBuilderTy &Bld = CGF.Builder;
auto CI = CS.capture_begin();
OpenPOWER on IntegriCloud