diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2018-03-19 17:04:07 +0000 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-03-19 17:04:07 +0000 |
| commit | b7f3cba84cfb717e4a95f90c6d2a8ec4b20c1d3f (patch) | |
| tree | e51275009642d5c2c18204ea9739df429980e877 /clang/lib | |
| parent | 5ccd87233fdac5119d50ebfba413c8f4bc484f16 (diff) | |
| download | bcm5719-llvm-b7f3cba84cfb717e4a95f90c6d2a8ec4b20c1d3f.tar.gz bcm5719-llvm-b7f3cba84cfb717e4a95f90c6d2a8ec4b20c1d3f.zip | |
[OPENMP, NVPTX] Emit correct thread id.
We emitted fake thread id for the outined function in NVPTX codegen.
Patch adds emission of the real thread id.
llvm-svn: 327867
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 8 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 51 |
2 files changed, 31 insertions, 28 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 410241b6189..f8a0772ebba 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -268,6 +268,10 @@ protected: void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee, ArrayRef<llvm::Value *> Args = llvm::None) const; + /// \brief Emits address of the word in a memory where current thread id is + /// stored. + virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); + private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -564,10 +568,6 @@ private: /// \return Cache variable for the specified threadprivate. llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD); - /// \brief Emits address of the word in a memory where current thread id is - /// stored. - virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Gets (if variable with the given name already exist) or creates /// internal global variable with the specified Name. The created variable has /// linkage CommonLinkage by default and is initialized by null value. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 0eb01e7afe7..c2b846e83a1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -608,7 +608,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); - emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn); + emitCall(CGF, WST.Loc, WST.WorkerFn); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); @@ -831,10 +831,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Insert call to work function via shared wrapper. The shared // wrapper takes two arguments: // - the parallelism level; - // - the master thread ID; - emitOutlinedFunctionCall(CGF, WST.Loc, W, - {Bld.getInt16(/*ParallelLevel=*/0), - getMasterThreadID(CGF)}); + // - the thread ID; + emitCall(CGF, WST.Loc, W, + {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)}); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -1316,12 +1315,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -1350,7 +1349,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF, + auto &&L0ParallelGen = [this, WFn, CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { CGBuilderTy &Bld = CGF.Builder; @@ -1420,17 +1419,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( auto *ThreadID = getThreadID(CGF, Loc); llvm::Value *Args[] = {RTLoc, ThreadID}; - auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&SeqGen = [this, Fn, CapturedVars, Args, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &&CodeGen = [this, Fn, CapturedVars, Loc](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + Address ZeroAddr = + CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( + /*DestWidth=*/32, /*Signed=*/1), + ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); }; @@ -1468,7 +1470,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2873,14 +2875,15 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // Get the array of arguments. SmallVector<llvm::Value *, 8> Args; - // TODO: suppport SIMD and pass actual values - Args.emplace_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - Args.emplace_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer()); + Args.emplace_back(ZeroAddr.getPointer()); CGBuilderTy &Bld = CGF.Builder; auto CI = CS.capture_begin(); |

