diff options
| author | Artem Belevich <tra@google.com> | 2016-02-12 18:29:18 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2016-02-12 18:29:18 +0000 |
| commit | 186091094ae7c9d9afd438842057e892303686c8 (patch) | |
| tree | a633fb26ce11cbf36e23798d870f5710bcf24d06 /clang/lib | |
| parent | 996ad1fa0019d5ec1f5c3a9a306339a37470be0a (diff) | |
| download | bcm5719-llvm-186091094ae7c9d9afd438842057e892303686c8.tar.gz bcm5719-llvm-186091094ae7c9d9afd438842057e892303686c8.zip | |
[CUDA] Tweak attribute-based overload resolution to match nvcc behavior.
This is an artefact of split-mode CUDA compilation that we need to
mimic. HD functions are sometimes allowed to call H or D functions. Due
to split compilation mode device-side compilation will not see host-only
function and thus they will not be considered at all. For clang both H
and D variants will become function overloads visible to
compiler. Normally target attribute is considered only if C++ rules can
not determine which function is better. However in this case we need to
ignore functions that would not be present during current compilation
phase before we apply normal overload resolution rules.
Changes:
* introduced another level of call preference to better describe
possible call combinations.
* removed WrongSide functions from consideration if the set contains
SameSide function.
* disabled H->D, D->H and G->H calls. These combinations are
not allowed by CUDA and we were reluctantly allowing them to work
around device-side calls to math functions in std namespace.
We no longer need it after r258880.
Differential Revision: http://reviews.llvm.org/D16870
llvm-svn: 260697
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Sema/SemaCUDA.cpp | 79 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaOverload.cpp | 36 |
2 files changed, 72 insertions, 43 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 84fccd5ef59..4e59a0a0aaa 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -68,26 +68,26 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { // Ph - preference in host mode // Pd - preference in device mode // H - handled in (x) -// Preferences: b-best, f-fallback, l-last resort, n-never. +// Preferences: N:native, HD:host-device, SS:same side, WS:wrong side, --:never. // -// | F | T | Ph | Pd | H | -// |----+----+----+----+-----+ -// | d | d | b | b | (b) | -// | d | g | n | n | (a) | -// | d | h | l | l | (e) | -// | d | hd | f | f | (c) | -// | g | d | b | b | (b) | -// | g | g | n | n | (a) | -// | g | h | l | l | (e) | -// | g | hd | f | f | (c) | -// | h | d | l | l | (e) | -// | h | g | b | b | (b) | -// | h | h | b | b | (b) | -// | h | hd | f | f | (c) | -// | hd | d | l | f | (d) | -// | hd | g | f | n |(d/a)| -// | hd | h | f | l | (d) | -// | hd | hd | b | b | (b) | +// | F | T | Ph | Pd | H | +// |----+----+-----+-----+-----+ +// | d | d | N | N | (c) | +// | d | g | -- | -- | (a) | +// | d | h | -- | -- | (e) | +// | d | hd | HD | HD | (b) | +// | g | d | N | N | (c) | +// | g | g | -- | -- | (a) | +// | g | h | -- | -- | (e) | +// | g | hd | HD | HD | (b) | +// | h | d | -- | -- | (e) | +// | h | g | N | N | (c) | +// | h | h | N | N | (c) | +// | h | hd | HD | HD | (b) | +// | hd | d | WS | SS | (d) | +// | hd | g | SS | -- |(d/a)| +// | hd | h | SS | WS | (d) | +// | hd | hd | HD | HD | (b) | Sema::CUDAFunctionPreference Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, @@ -112,39 +112,38 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice))) return CFP_Never; - // (b) Best case scenarios + // (b) Calling HostDevice is OK for everyone. + if (CalleeTarget == CFT_HostDevice) + return CFP_HostDevice; + + // (c) Best case scenarios if (CalleeTarget == CallerTarget || (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) - return CFP_Best; - - // (c) Calling HostDevice is OK as a fallback that works for everyone. - if (CalleeTarget == CFT_HostDevice) - return CFP_Fallback; - - // Figure out what should be returned 'last resort' cases. Normally - // those would not be allowed, but we'll consider them if - // CUDADisableTargetCallChecks is true. - CUDAFunctionPreference QuestionableResult = - getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never; + return CFP_Native; // (d) HostDevice behavior depends on compilation mode. if (CallerTarget == CFT_HostDevice) { - // Calling a function that matches compilation mode is OK. - // Calling a function from the other side is frowned upon. - if (getLangOpts().CUDAIsDevice) - return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult; - else - return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global) - ? CFP_Fallback - : QuestionableResult; + // It's OK to call a compilation-mode matching function from an HD one. + if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || + (!getLangOpts().CUDAIsDevice && + (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) + return CFP_SameSide; + + // We'll allow calls to non-mode-matching functions if target call + // checks are disabled. This is needed to avoid complaining about + // HD->H calls when we compile for device side and vice versa. + if (getLangOpts().CUDADisableTargetCallChecks) + return CFP_WrongSide; + + return CFP_Never; } // (e) Calling across device/host boundary is not something you should do. if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) - return QuestionableResult; + return CFP_Never; llvm_unreachable("All cases should've been handled by now."); } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index d71b307d15b..f190872f0aa 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -8722,14 +8722,44 @@ OverloadingResult OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, iterator &Best, bool UserDefinedConversion) { + llvm::SmallVector<OverloadCandidate *, 16> Candidates; + std::transform(begin(), end(), std::back_inserter(Candidates), + [](OverloadCandidate &Cand) { return &Cand; }); + + // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA + // but accepted by both clang and NVCC. However during a particular + // compilation mode only one call variant is viable. We need to + // exclude non-viable overload candidates from consideration based + // only on their host/device attributes. Specifically, if one + // candidate call is WrongSide and the other is SameSide, we ignore + // the WrongSide candidate. + if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads) { + const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext); + bool ContainsSameSideCandidate = + llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { + return Cand->Function && + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_SameSide; + }); + if (ContainsSameSideCandidate) { + auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) { + return Cand->Function && + S.IdentifyCUDAPreference(Caller, Cand->Function) == + Sema::CFP_WrongSide; + }; + Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(), + IsWrongSideCandidate), + Candidates.end()); + } + } + // Find the best viable function. Best = end(); - for (iterator Cand = begin(); Cand != end(); ++Cand) { + for (auto *Cand : Candidates) if (Cand->Viable) if (Best == end() || isBetterOverloadCandidate(S, *Cand, *Best, Loc, UserDefinedConversion)) Best = Cand; - } // If we didn't find any viable functions, abort. if (Best == end()) @@ -8739,7 +8769,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, // Make sure that this function is better than every other viable // function. If not, we have an ambiguity. - for (iterator Cand = begin(); Cand != end(); ++Cand) { + for (auto *Cand : Candidates) { if (Cand->Viable && Cand != Best && !isBetterOverloadCandidate(S, *Best, *Cand, Loc, |

