summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-02-12 18:29:18 +0000
committerArtem Belevich <tra@google.com>2016-02-12 18:29:18 +0000
commit186091094ae7c9d9afd438842057e892303686c8 (patch)
treea633fb26ce11cbf36e23798d870f5710bcf24d06 /clang/lib
parent996ad1fa0019d5ec1f5c3a9a306339a37470be0a (diff)
downloadbcm5719-llvm-186091094ae7c9d9afd438842057e892303686c8.tar.gz
bcm5719-llvm-186091094ae7c9d9afd438842057e892303686c8.zip
[CUDA] Tweak attribute-based overload resolution to match nvcc behavior.
This is an artefact of split-mode CUDA compilation that we need to mimic. HD functions are sometimes allowed to call H or D functions. Due to split compilation mode device-side compilation will not see host-only function and thus they will not be considered at all. For clang both H and D variants will become function overloads visible to compiler. Normally target attribute is considered only if C++ rules can not determine which function is better. However in this case we need to ignore functions that would not be present during current compilation phase before we apply normal overload resolution rules. Changes: * introduced another level of call preference to better describe possible call combinations. * removed WrongSide functions from consideration if the set contains SameSide function. * disabled H->D, D->H and G->H calls. These combinations are not allowed by CUDA and we were reluctantly allowing them to work around device-side calls to math functions in std namespace. We no longer need it after r258880. Differential Revision: http://reviews.llvm.org/D16870 llvm-svn: 260697
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp79
-rw-r--r--clang/lib/Sema/SemaOverload.cpp36
2 files changed, 72 insertions, 43 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 84fccd5ef59..4e59a0a0aaa 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -68,26 +68,26 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
// Ph - preference in host mode
// Pd - preference in device mode
// H - handled in (x)
-// Preferences: b-best, f-fallback, l-last resort, n-never.
+// Preferences: N:native, HD:host-device, SS:same side, WS:wrong side, --:never.
//
-// | F | T | Ph | Pd | H |
-// |----+----+----+----+-----+
-// | d | d | b | b | (b) |
-// | d | g | n | n | (a) |
-// | d | h | l | l | (e) |
-// | d | hd | f | f | (c) |
-// | g | d | b | b | (b) |
-// | g | g | n | n | (a) |
-// | g | h | l | l | (e) |
-// | g | hd | f | f | (c) |
-// | h | d | l | l | (e) |
-// | h | g | b | b | (b) |
-// | h | h | b | b | (b) |
-// | h | hd | f | f | (c) |
-// | hd | d | l | f | (d) |
-// | hd | g | f | n |(d/a)|
-// | hd | h | f | l | (d) |
-// | hd | hd | b | b | (b) |
+// | F | T | Ph | Pd | H |
+// |----+----+-----+-----+-----+
+// | d | d | N | N | (c) |
+// | d | g | -- | -- | (a) |
+// | d | h | -- | -- | (e) |
+// | d | hd | HD | HD | (b) |
+// | g | d | N | N | (c) |
+// | g | g | -- | -- | (a) |
+// | g | h | -- | -- | (e) |
+// | g | hd | HD | HD | (b) |
+// | h | d | -- | -- | (e) |
+// | h | g | N | N | (c) |
+// | h | h | N | N | (c) |
+// | h | hd | HD | HD | (b) |
+// | hd | d | WS | SS | (d) |
+// | hd | g | SS | -- |(d/a)|
+// | hd | h | SS | WS | (d) |
+// | hd | hd | HD | HD | (b) |
Sema::CUDAFunctionPreference
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
@@ -112,39 +112,38 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
(CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
return CFP_Never;
- // (b) Best case scenarios
+ // (b) Calling HostDevice is OK for everyone.
+ if (CalleeTarget == CFT_HostDevice)
+ return CFP_HostDevice;
+
+ // (c) Best case scenarios
if (CalleeTarget == CallerTarget ||
(CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
- return CFP_Best;
-
- // (c) Calling HostDevice is OK as a fallback that works for everyone.
- if (CalleeTarget == CFT_HostDevice)
- return CFP_Fallback;
-
- // Figure out what should be returned 'last resort' cases. Normally
- // those would not be allowed, but we'll consider them if
- // CUDADisableTargetCallChecks is true.
- CUDAFunctionPreference QuestionableResult =
- getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
+ return CFP_Native;
// (d) HostDevice behavior depends on compilation mode.
if (CallerTarget == CFT_HostDevice) {
- // Calling a function that matches compilation mode is OK.
- // Calling a function from the other side is frowned upon.
- if (getLangOpts().CUDAIsDevice)
- return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
- else
- return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
- ? CFP_Fallback
- : QuestionableResult;
+ // It's OK to call a compilation-mode matching function from an HD one.
+ if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
+ (!getLangOpts().CUDAIsDevice &&
+ (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
+ return CFP_SameSide;
+
+ // We'll allow calls to non-mode-matching functions if target call
+ // checks are disabled. This is needed to avoid complaining about
+ // HD->H calls when we compile for device side and vice versa.
+ if (getLangOpts().CUDADisableTargetCallChecks)
+ return CFP_WrongSide;
+
+ return CFP_Never;
}
// (e) Calling across device/host boundary is not something you should do.
if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
(CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
- return QuestionableResult;
+ return CFP_Never;
llvm_unreachable("All cases should've been handled by now.");
}
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index d71b307d15b..f190872f0aa 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -8722,14 +8722,44 @@ OverloadingResult
OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
iterator &Best,
bool UserDefinedConversion) {
+ llvm::SmallVector<OverloadCandidate *, 16> Candidates;
+ std::transform(begin(), end(), std::back_inserter(Candidates),
+ [](OverloadCandidate &Cand) { return &Cand; });
+
+ // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA
+ // but accepted by both clang and NVCC. However during a particular
+ // compilation mode only one call variant is viable. We need to
+ // exclude non-viable overload candidates from consideration based
+ // only on their host/device attributes. Specifically, if one
+ // candidate call is WrongSide and the other is SameSide, we ignore
+ // the WrongSide candidate.
+ if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads) {
+ const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+ bool ContainsSameSideCandidate =
+ llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
+ return Cand->Function &&
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_SameSide;
+ });
+ if (ContainsSameSideCandidate) {
+ auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
+ return Cand->Function &&
+ S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+ Sema::CFP_WrongSide;
+ };
+ Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(),
+ IsWrongSideCandidate),
+ Candidates.end());
+ }
+ }
+
// Find the best viable function.
Best = end();
- for (iterator Cand = begin(); Cand != end(); ++Cand) {
+ for (auto *Cand : Candidates)
if (Cand->Viable)
if (Best == end() || isBetterOverloadCandidate(S, *Cand, *Best, Loc,
UserDefinedConversion))
Best = Cand;
- }
// If we didn't find any viable functions, abort.
if (Best == end())
@@ -8739,7 +8769,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
// Make sure that this function is better than every other viable
// function. If not, we have an ambiguity.
- for (iterator Cand = begin(); Cand != end(); ++Cand) {
+ for (auto *Cand : Candidates) {
if (Cand->Viable &&
Cand != Best &&
!isBetterOverloadCandidate(S, *Best, *Cand, Loc,
OpenPOWER on IntegriCloud