diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-10-09 23:54:10 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2019-10-09 23:54:10 +0000 |
commit | 229c78d3a5d107e1f7436e5afda7b7c80d1da394 (patch) | |
tree | c07d0bfb4e7783e90c8836ce22fab9c057470f13 /clang/lib/Sema | |
parent | 80b080723ff7f8a0097d76a322b241514a7f3864 (diff) | |
download | bcm5719-llvm-229c78d3a5d107e1f7436e5afda7b7c80d1da394.tar.gz bcm5719-llvm-229c78d3a5d107e1f7436e5afda7b7c80d1da394.zip |
[CUDA][HIP] Fix host/device check with -fopenmp
CUDA/HIP program may be compiled with -fopenmp. In this case, -fopenmp is only passed to host compilation
to take advantages of multi-threads computation.
CUDA/HIP and OpenMP both use Sema::DeviceCallGraph to store functions to be analyzed and remove them
once they decide the function is sure to be emitted. CUDA/HIP and OpenMP have different functions to determine
if a function is sure to be emitted.
To check host/device correctly for CUDA/HIP when -fopenmp is enabled, there needs a unified logic to determine
whether a function is to be emitted. The logic needs to be aware of both CUDA and OpenMP logic.
Differential Revision: https://reviews.llvm.org/D67837
llvm-svn: 374263
Diffstat (limited to 'clang/lib/Sema')
-rw-r--r-- | clang/lib/Sema/SemaCUDA.cpp | 52 | ||||
-rw-r--r-- | clang/lib/Sema/SemaDecl.cpp | 84 | ||||
-rw-r--r-- | clang/lib/Sema/SemaOpenMP.cpp | 146 |
3 files changed, 155 insertions, 127 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 3c9c991c77f..d0ddfd040c9 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -600,40 +600,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -// Do we know that we will eventually codegen the given function? -static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return false; - - // When compiling for device, host functions are never emitted. Similarly, - // when compiling for host, device and global functions are never emitted. - // (Technically, we do emit a host-side stub for global functions, but this - // doesn't count for our purposes here.) - Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); - if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) - return false; - if (!S.getLangOpts().CUDAIsDevice && - (T == Sema::CFT_Device || T == Sema::CFT_Global)) - return false; - - // Check whether this function is externally visible -- if so, it's - // known-emitted. - // - // We have to check the GVA linkage of the function's *definition* -- if we - // only have a declaration, we don't know whether or not the function will be - // emitted, because (say) the definition could include "inline". - FunctionDecl *Def = FD->getDefinition(); - - if (Def && - !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) - return true; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return S.DeviceKnownEmittedFns.count(FD) > 0; -} - Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); @@ -647,7 +613,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, // device code if we're compiling for device. Defer any errors in device // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) { - return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; } @@ -675,7 +642,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, if (getLangOpts().CUDAIsDevice) return DeviceDiagBuilder::K_Nop; - return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; default: @@ -702,12 +670,16 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); + bool CallerKnownEmitted = + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; if (CallerKnownEmitted) { // Host-side references to a __global__ function refer to the stub, so the // function itself is never emitted and therefore should not be marked. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) - markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted); + if (!shouldIgnoreInHostDeviceCheck(Callee)) + markKnownEmitted( + *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { + return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); } else { // If we have // host fn calls kernel fn calls host+device, @@ -715,7 +687,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // omitting at the call to the kernel from the callgraph. This ensures // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + if (!shouldIgnoreInHostDeviceCheck(Callee)) DeviceCallGraph[Caller].insert({Callee, Loc}); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4a027d86819..a5d95a38a99 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -17614,3 +17614,87 @@ void Sema::ActOnPragmaWeakAlias(IdentifierInfo* Name, Decl *Sema::getObjCDeclContext() const { return (dyn_cast_or_null<ObjCContainerDecl>(CurContext)); } + +Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) { + // Templates are emitted when they're instantiated. + if (FD->isDependentContext()) + return FunctionEmissionStatus::TemplateDiscarded; + + FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown; + if (LangOpts.OpenMPIsDevice) { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + if (DevTy.hasValue()) { + if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) + OMPES = FunctionEmissionStatus::OMPDiscarded; + else if (DeviceKnownEmittedFns.count(FD) > 0) + OMPES = FunctionEmissionStatus::Emitted; + } + } else if (LangOpts.OpenMP) { + // In OpenMP 4.5 all the functions are host functions. + if (LangOpts.OpenMP <= 45) { + OMPES = FunctionEmissionStatus::Emitted; + } else { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + // In OpenMP 5.0 or above, DevTy may be changed later by + // #pragma omp declare target to(*) device_type(*). Therefore DevTy + // having no value does not imply host. The emission status will be + // checked again at the end of compilation unit. + if (DevTy.hasValue()) { + if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { + OMPES = FunctionEmissionStatus::OMPDiscarded; + } else if (DeviceKnownEmittedFns.count(FD) > 0) { + OMPES = FunctionEmissionStatus::Emitted; + } + } + } + } + if (OMPES == FunctionEmissionStatus::OMPDiscarded || + (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA)) + return OMPES; + + if (LangOpts.CUDA) { + // When compiling for device, host functions are never emitted. Similarly, + // when compiling for host, device and global functions are never emitted. + // (Technically, we do emit a host-side stub for global functions, but this + // doesn't count for our purposes here.) + Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD); + if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host) + return FunctionEmissionStatus::CUDADiscarded; + if (!LangOpts.CUDAIsDevice && + (T == Sema::CFT_Device || T == Sema::CFT_Global)) + return FunctionEmissionStatus::CUDADiscarded; + + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will + // be emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + if (Def && + !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def)) + && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted)) + return FunctionEmissionStatus::Emitted; + } + + // Otherwise, the function is known-emitted if it's in our set of + // known-emitted functions. + return (DeviceKnownEmittedFns.count(FD) > 0) + ? FunctionEmissionStatus::Emitted + : FunctionEmissionStatus::Unknown; +} + +bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) { + // Host-side references to a __global__ function refer to the stub, so the + // function itself is never emitted and therefore should not be marked. + // If we have host fn calls kernel fn calls host+device, the HD function + // does not get instantiated on the host. We model this by omitting at the + // call to the kernel from the callgraph. This ensures that, when compiling + // for host, only HD functions actually called from the host get marked as + // known-emitted. + return LangOpts.CUDA && !LangOpts.CUDAIsDevice && + IdentifyCUDATarget(Callee) == CFT_Global; +} diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5ceee810c6b..bc4af2d7add 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1565,34 +1565,11 @@ enum class FunctionEmissionStatus { }; } // anonymous namespace -/// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownDeviceEmitted(Sema &S, FunctionDecl *FD) { - assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice && - "Expected OpenMP device compilation."); - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return FunctionEmissionStatus::Discarded; - - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; -} - Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice && "Expected OpenMP device compilation."); - FunctionEmissionStatus FES = - isKnownDeviceEmitted(*this, getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1602,42 +1579,23 @@ Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, Kind = isOpenMPDeviceDelayedContext(*this) ? DeviceDiagBuilder::K_Deferred : DeviceDiagBuilder::K_Immediate; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: Kind = DeviceDiagBuilder::K_Nop; break; + case FunctionEmissionStatus::CUDADiscarded: + llvm_unreachable("CUDADiscarded unexpected in OpenMP device compilation"); + break; } return DeviceDiagBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this); } -/// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownHostEmitted(Sema &S, FunctionDecl *FD) { - assert(S.LangOpts.OpenMP && !S.LangOpts.OpenMPIsDevice && - "Expected OpenMP host compilation."); - // In OpenMP 4.5 all the functions are host functions. - if (S.LangOpts.OpenMP <= 45) - return FunctionEmissionStatus::Emitted; - - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; -} - Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, unsigned DiagID) { assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice && "Expected OpenMP host compilation."); - FunctionEmissionStatus FES = - isKnownHostEmitted(*this, getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1646,7 +1604,9 @@ Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, case FunctionEmissionStatus::Unknown: Kind = DeviceDiagBuilder::K_Deferred; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: + case FunctionEmissionStatus::CUDADiscarded: Kind = DeviceDiagBuilder::K_Nop; break; } @@ -1663,31 +1623,34 @@ void Sema::checkOpenMPDeviceFunction(SourceLocation Loc, FunctionDecl *Callee, FunctionDecl *Caller = getCurFunctionDecl(); // host only function are not available on the device. - if (Caller && - (isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted || - (!isOpenMPDeviceDelayedContext(*this) && - isKnownDeviceEmitted(*this, Caller) == - FunctionEmissionStatus::Unknown)) && - isKnownDeviceEmitted(*this, Callee) == - FunctionEmissionStatus::Discarded) { - StringRef HostDevTy = - getOpenMPSimpleClauseTypeName(OMPC_device_type, OMPC_DEVICE_TYPE_host); - Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; - Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), - diag::note_omp_marked_device_type_here) - << HostDevTy; - return; + if (Caller) { + FunctionEmissionStatus CallerS = getEmissionStatus(Caller); + FunctionEmissionStatus CalleeS = getEmissionStatus(Callee); + assert(CallerS != FunctionEmissionStatus::CUDADiscarded && + CalleeS != FunctionEmissionStatus::CUDADiscarded && + "CUDADiscarded unexpected in OpenMP device function check"); + if ((CallerS == FunctionEmissionStatus::Emitted || + (!isOpenMPDeviceDelayedContext(*this) && + CallerS == FunctionEmissionStatus::Unknown)) && + CalleeS == FunctionEmissionStatus::OMPDiscarded) { + StringRef HostDevTy = getOpenMPSimpleClauseTypeName( + OMPC_device_type, OMPC_DEVICE_TYPE_host); + Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; + Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), + diag::note_omp_marked_device_type_here) + << HostDevTy; + return; + } } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. if ((CheckForDelayedContext && !isOpenMPDeviceDelayedContext(*this)) || (!Caller && !CheckForDelayedContext) || - (Caller && - isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) + (Caller && getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted)) markKnownEmitted(*this, Caller, Callee, Loc, [CheckForDelayedContext](Sema &S, FunctionDecl *FD) { return CheckForDelayedContext && - isKnownDeviceEmitted(S, FD) == + S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; }); else if (Caller) @@ -1703,29 +1666,38 @@ void Sema::checkOpenMPHostFunction(SourceLocation Loc, FunctionDecl *Callee, FunctionDecl *Caller = getCurFunctionDecl(); // device only function are not available on the host. - if (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted && - isKnownHostEmitted(*this, Callee) == FunctionEmissionStatus::Discarded) { - StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( - OMPC_device_type, OMPC_DEVICE_TYPE_nohost); - Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; - Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), - diag::note_omp_marked_device_type_here) - << NoHostDevTy; - return; + if (Caller) { + FunctionEmissionStatus CallerS = getEmissionStatus(Caller); + FunctionEmissionStatus CalleeS = getEmissionStatus(Callee); + assert( + (LangOpts.CUDA || (CallerS != FunctionEmissionStatus::CUDADiscarded && + CalleeS != FunctionEmissionStatus::CUDADiscarded)) && + "CUDADiscarded unexpected in OpenMP host function check"); + if (CallerS == FunctionEmissionStatus::Emitted && + CalleeS == FunctionEmissionStatus::OMPDiscarded) { + StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( + OMPC_device_type, OMPC_DEVICE_TYPE_nohost); + Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; + Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), + diag::note_omp_marked_device_type_here) + << NoHostDevTy; + return; + } } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - if ((!CheckCaller && !Caller) || - (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) - markKnownEmitted( - *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { - return CheckCaller && - isKnownHostEmitted(S, FD) == FunctionEmissionStatus::Emitted; - }); - else if (Caller) - DeviceCallGraph[Caller].insert({Callee, Loc}); + if (!shouldIgnoreInHostDeviceCheck(Callee)) { + if ((!CheckCaller && !Caller) || + (Caller && + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted)) + markKnownEmitted( + *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { + return CheckCaller && + S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); + else if (Caller) + DeviceCallGraph[Caller].insert({Callee, Loc}); + } } void Sema::checkOpenMPDeviceExpr(const Expr *E) { |