summaryrefslogtreecommitdiffstats
path: root/clang/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2019-10-09 23:54:10 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2019-10-09 23:54:10 +0000
commit229c78d3a5d107e1f7436e5afda7b7c80d1da394 (patch)
treec07d0bfb4e7783e90c8836ce22fab9c057470f13 /clang/lib/Sema/SemaCUDA.cpp
parent80b080723ff7f8a0097d76a322b241514a7f3864 (diff)
downloadbcm5719-llvm-229c78d3a5d107e1f7436e5afda7b7c80d1da394.tar.gz
bcm5719-llvm-229c78d3a5d107e1f7436e5afda7b7c80d1da394.zip
[CUDA][HIP] Fix host/device check with -fopenmp
CUDA/HIP program may be compiled with -fopenmp. In this case, -fopenmp is only passed to host compilation to take advantages of multi-threads computation. CUDA/HIP and OpenMP both use Sema::DeviceCallGraph to store functions to be analyzed and remove them once they decide the function is sure to be emitted. CUDA/HIP and OpenMP have different functions to determine if a function is sure to be emitted. To check host/device correctly for CUDA/HIP when -fopenmp is enabled, there needs a unified logic to determine whether a function is to be emitted. The logic needs to be aware of both CUDA and OpenMP logic. Differential Revision: https://reviews.llvm.org/D67837 llvm-svn: 374263
Diffstat (limited to 'clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp52
1 files changed, 12 insertions, 40 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 3c9c991c77f..d0ddfd040c9 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -600,40 +600,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
}
-// Do we know that we will eventually codegen the given function?
-static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
- // Templates are emitted when they're instantiated.
- if (FD->isDependentContext())
- return false;
-
- // When compiling for device, host functions are never emitted. Similarly,
- // when compiling for host, device and global functions are never emitted.
- // (Technically, we do emit a host-side stub for global functions, but this
- // doesn't count for our purposes here.)
- Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
- if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
- return false;
- if (!S.getLangOpts().CUDAIsDevice &&
- (T == Sema::CFT_Device || T == Sema::CFT_Global))
- return false;
-
- // Check whether this function is externally visible -- if so, it's
- // known-emitted.
- //
- // We have to check the GVA linkage of the function's *definition* -- if we
- // only have a declaration, we don't know whether or not the function will be
- // emitted, because (say) the definition could include "inline".
- FunctionDecl *Def = FD->getDefinition();
-
- if (Def &&
- !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
- return true;
-
- // Otherwise, the function is known-emitted if it's in our set of
- // known-emitted functions.
- return S.DeviceKnownEmittedFns.count(FD) > 0;
-}
-
Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
unsigned DiagID) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
@@ -647,7 +613,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
// device code if we're compiling for device. Defer any errors in device
// mode until the function is known-emitted.
if (getLangOpts().CUDAIsDevice) {
- return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+ FunctionEmissionStatus::Emitted)
? DeviceDiagBuilder::K_ImmediateWithCallStack
: DeviceDiagBuilder::K_Deferred;
}
@@ -675,7 +642,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
if (getLangOpts().CUDAIsDevice)
return DeviceDiagBuilder::K_Nop;
- return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+ return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
+ FunctionEmissionStatus::Emitted)
? DeviceDiagBuilder::K_ImmediateWithCallStack
: DeviceDiagBuilder::K_Deferred;
default:
@@ -702,12 +670,16 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// If the caller is known-emitted, mark the callee as known-emitted.
// Otherwise, mark the call in our call graph so we can traverse it later.
- bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+ bool CallerKnownEmitted =
+ getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
if (CallerKnownEmitted) {
// Host-side references to a __global__ function refer to the stub, so the
// function itself is never emitted and therefore should not be marked.
- if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
- markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted);
+ if (!shouldIgnoreInHostDeviceCheck(Callee))
+ markKnownEmitted(
+ *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) {
+ return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
+ });
} else {
// If we have
// host fn calls kernel fn calls host+device,
@@ -715,7 +687,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// omitting at the call to the kernel from the callgraph. This ensures
// that, when compiling for host, only HD functions actually called from the
// host get marked as known-emitted.
- if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+ if (!shouldIgnoreInHostDeviceCheck(Callee))
DeviceCallGraph[Caller].insert({Callee, Loc});
}
OpenPOWER on IntegriCloud