diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-10-09 23:54:10 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-10-09 23:54:10 +0000 |
commit | 229c78d3a5d107e1f7436e5afda7b7c80d1da394 (patch) | |
tree | c07d0bfb4e7783e90c8836ce22fab9c057470f13 /clang/lib/Sema/SemaCUDA.cpp | |
parent | 80b080723ff7f8a0097d76a322b241514a7f3864 (diff) | |
download | bcm5719-llvm-229c78d3a5d107e1f7436e5afda7b7c80d1da394.tar.gz bcm5719-llvm-229c78d3a5d107e1f7436e5afda7b7c80d1da394.zip |
[CUDA][HIP] Fix host/device check with -fopenmp
CUDA/HIP program may be compiled with -fopenmp. In this case, -fopenmp is only passed to host compilation
to take advantages of multi-threads computation.
CUDA/HIP and OpenMP both use Sema::DeviceCallGraph to store functions to be analyzed and remove them
once they decide the function is sure to be emitted. CUDA/HIP and OpenMP have different functions to determine
if a function is sure to be emitted.
To check host/device correctly for CUDA/HIP when -fopenmp is enabled, there needs a unified logic to determine
whether a function is to be emitted. The logic needs to be aware of both CUDA and OpenMP logic.
Differential Revision: https://reviews.llvm.org/D67837
llvm-svn: 374263
Diffstat (limited to 'clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r-- | clang/lib/Sema/SemaCUDA.cpp | 52 |
1 files changed, 12 insertions, 40 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 3c9c991c77f..d0ddfd040c9 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -600,40 +600,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -// Do we know that we will eventually codegen the given function? -static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return false; - - // When compiling for device, host functions are never emitted. Similarly, - // when compiling for host, device and global functions are never emitted. - // (Technically, we do emit a host-side stub for global functions, but this - // doesn't count for our purposes here.) - Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); - if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) - return false; - if (!S.getLangOpts().CUDAIsDevice && - (T == Sema::CFT_Device || T == Sema::CFT_Global)) - return false; - - // Check whether this function is externally visible -- if so, it's - // known-emitted. - // - // We have to check the GVA linkage of the function's *definition* -- if we - // only have a declaration, we don't know whether or not the function will be - // emitted, because (say) the definition could include "inline". - FunctionDecl *Def = FD->getDefinition(); - - if (Def && - !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) - return true; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return S.DeviceKnownEmittedFns.count(FD) > 0; -} - Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); @@ -647,7 +613,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, // device code if we're compiling for device. Defer any errors in device // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) { - return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; } @@ -675,7 +642,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, if (getLangOpts().CUDAIsDevice) return DeviceDiagBuilder::K_Nop; - return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; default: @@ -702,12 +670,16 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); + bool CallerKnownEmitted = + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; if (CallerKnownEmitted) { // Host-side references to a __global__ function refer to the stub, so the // function itself is never emitted and therefore should not be marked. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) - markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted); + if (!shouldIgnoreInHostDeviceCheck(Callee)) + markKnownEmitted( + *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { + return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); } else { // If we have // host fn calls kernel fn calls host+device, @@ -715,7 +687,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // omitting at the call to the kernel from the callgraph. This ensures // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + if (!shouldIgnoreInHostDeviceCheck(Callee)) DeviceCallGraph[Caller].insert({Callee, Loc}); } |