4 files changed, 117 insertions, 11 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index d99f8e03ca8..5e6d0e3e53b 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -54,6 +54,45 @@ ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
                        /*IsExecConfig=*/true);
 }
 
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) {
+  bool HasHostAttr = false;
+  bool HasDeviceAttr = false;
+  bool HasGlobalAttr = false;
+  bool HasInvalidTargetAttr = false;
+  while (Attr) {
+    switch(Attr->getKind()){
+    case AttributeList::AT_CUDAGlobal:
+      HasGlobalAttr = true;
+      break;
+    case AttributeList::AT_CUDAHost:
+      HasHostAttr = true;
+      break;
+    case AttributeList::AT_CUDADevice:
+      HasDeviceAttr = true;
+      break;
+    case AttributeList::AT_CUDAInvalidTarget:
+      HasInvalidTargetAttr = true;
+      break;
+    default:
+      break;
+    }
+    Attr = Attr->getNext();
+  }
+  if (HasInvalidTargetAttr)
+    return CFT_InvalidTarget;
+
+  if (HasGlobalAttr)
+    return CFT_Global;
+
+  if (HasHostAttr && HasDeviceAttr)
+    return CFT_HostDevice;
+
+  if (HasDeviceAttr)
+    return CFT_Device;
+
+  return CFT_Host;
+}
+
 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
   // Code that lives outside a function is run on the host.
@@ -815,3 +854,32 @@ void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
     Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
   }
 }
+
+void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
+                                   LookupResult &Previous) {
+  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
+  CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
+  for (NamedDecl *OldND : Previous) {
+    FunctionDecl *OldFD = OldND->getAsFunction();
+    if (!OldFD)
+      continue;
+
+    CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
+    // Don't allow HD and global functions to overload other functions with the
+    // same signature.  We allow overloading based on CUDA attributes so that
+    // functions can have different implementations on the host and device, but
+    // HD/global functions "exist" in some sense on both the host and device, so
+    // should have the same implementation on both sides.
+    if (NewTarget != OldTarget &&
+        ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
+         (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
+        !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
+                    /* ConsiderCudaAttrs = */ false)) {
+      Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
+          << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
+      Diag(OldFD->getLocation(), diag::note_previous_declaration);
+      NewFD->setInvalidDecl();
+      break;
+    }
+  }
+}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 1650a11222c..be2466c9235 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9090,6 +9090,9 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
              diag::warn_cxx1z_compat_exception_spec_in_signature)
             << NewFD;
     }
+
+    if (!Redeclaration && LangOpts.CUDA)
+      checkCUDATargetOverload(NewFD, Previous);
   }
   return Redeclaration;
 }
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 24c9ec6d507..6985d69d00b 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -580,6 +580,7 @@ clang::MakeDeductionFailureInfo(ASTContext &Context,
   case Sema::TDK_TooManyArguments:
   case Sema::TDK_TooFewArguments:
   case Sema::TDK_MiscellaneousDeductionFailure:
+  case Sema::TDK_CUDATargetMismatch:
     Result.Data = nullptr;
     break;
 
@@ -647,6 +648,7 @@ void DeductionFailureInfo::Destroy() {
   case Sema::TDK_TooFewArguments:
   case Sema::TDK_InvalidExplicitArguments:
   case Sema::TDK_FailedOverloadResolution:
+  case Sema::TDK_CUDATargetMismatch:
     break;
 
   case Sema::TDK_Inconsistent:
@@ -689,6 +691,7 @@ TemplateParameter DeductionFailureInfo::getTemplateParameter() {
   case Sema::TDK_DeducedMismatch:
   case Sema::TDK_NonDeducedMismatch:
   case Sema::TDK_FailedOverloadResolution:
+  case Sema::TDK_CUDATargetMismatch:
     return TemplateParameter();
 
   case Sema::TDK_Incomplete:
@@ -720,6 +723,7 @@ TemplateArgumentList *DeductionFailureInfo::getTemplateArgumentList() {
   case Sema::TDK_Underqualified:
   case Sema::TDK_NonDeducedMismatch:
   case Sema::TDK_FailedOverloadResolution:
+  case Sema::TDK_CUDATargetMismatch:
     return nullptr;
 
   case Sema::TDK_DeducedMismatch:
@@ -747,6 +751,7 @@ const TemplateArgument *DeductionFailureInfo::getFirstArg() {
   case Sema::TDK_InvalidExplicitArguments:
   case Sema::TDK_SubstitutionFailure:
   case Sema::TDK_FailedOverloadResolution:
+  case Sema::TDK_CUDATargetMismatch:
     return nullptr;
 
   case Sema::TDK_Inconsistent:
@@ -774,6 +779,7 @@ const TemplateArgument *DeductionFailureInfo::getSecondArg() {
   case Sema::TDK_InvalidExplicitArguments:
   case Sema::TDK_SubstitutionFailure:
   case Sema::TDK_FailedOverloadResolution:
+  case Sema::TDK_CUDATargetMismatch:
     return nullptr;
 
   case Sema::TDK_Inconsistent:
@@ -1139,20 +1145,11 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
 
     CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
                        OldTarget = IdentifyCUDATarget(Old);
-    if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
+    if (NewTarget == CFT_InvalidTarget)
       return false;
 
     assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target.");
 
-    // Don't allow HD and global functions to overload other functions with the
-    // same signature.  We allow overloading based on CUDA attributes so that
-    // functions can have different implementations on the host and device, but
-    // HD/global functions "exist" in some sense on both the host and device, so
-    // should have the same implementation on both sides.
-    if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
-        (NewTarget == CFT_Global) || (OldTarget == CFT_Global))
-      return false;
-
     // Allow overloading of functions with same signature and different CUDA
     // target attributes.
     return NewTarget != OldTarget;
@@ -9713,6 +9710,10 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated,
     S.Diag(Templated->getLocation(), diag::note_ovl_candidate_bad_deduction);
     MaybeEmitInheritedConstructorNote(S, Found);
     return;
+  case Sema::TDK_CUDATargetMismatch:
+    S.Diag(Templated->getLocation(),
+           diag::note_cuda_ovl_candidate_target_mismatch);
+    return;
   }
 }
 
@@ -9969,6 +9970,7 @@ static unsigned RankDeductionFailure(const DeductionFailureInfo &DFI) {
   case Sema::TDK_DeducedMismatch:
   case Sema::TDK_NonDeducedMismatch:
   case Sema::TDK_MiscellaneousDeductionFailure:
+  case Sema::TDK_CUDATargetMismatch:
     return 3;
 
   case Sema::TDK_InstantiationDepth:
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 16dd9ba44aa..898765cbd79 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -7043,6 +7043,19 @@ bool Sema::CheckFunctionTemplateSpecialization(
         continue;
       }
 
+      // Target attributes are part of function signature during cuda
+      // compilation, so deduced template must also have matching CUDA
+      // target. Given that regular template deduction does not take
+      // target attributes into account, we perform target match check
+      // here and reject candidates that have different target.
+      if (LangOpts.CUDA &&
+          IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) {
+        FailedCandidates.addCandidate().set(
+            I.getPair(), FunTmpl->getTemplatedDecl(),
+            MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+        continue;
+      }
+
       // Record this candidate.
       if (ExplicitTemplateArgs)
         ConvertedTemplateArgs[Specialization] = std::move(Args);
@@ -8103,6 +8116,7 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
   //  instantiated from the member definition associated with its class
   //  template.
   UnresolvedSet<8> Matches;
+  AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
   TemplateSpecCandidateSet FailedCandidates(D.getIdentifierLoc());
   for (LookupResult::iterator P = Previous.begin(), PEnd = Previous.end();
        P != PEnd; ++P) {
@@ -8140,6 +8154,26 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
       continue;
     }
 
+    // Target attributes are part of function signature during cuda
+    // compilation, so deduced template must also have matching CUDA
+    // target. Given that regular template deduction does not take it
+    // into account, we perform target match check here and reject
+    // candidates that have different target.
+    if (LangOpts.CUDA) {
+      CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr);
+      // We need to adjust target when HD is forced by
+      // #pragma clang force_cuda_host_device
+      if (ForceCUDAHostDeviceDepth > 0 &&
+          (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host))
+        DeclaratorTarget = CFT_HostDevice;
+      if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) {
+        FailedCandidates.addCandidate().set(
+            P.getPair(), FunTmpl->getTemplatedDecl(),
+            MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+        continue;
+      }
+    }
+
     Matches.addDecl(Specialization, P.getAccess());
   }
 
@@ -8210,7 +8244,6 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
   }
 
   Specialization->setTemplateSpecializationKind(TSK, D.getIdentifierLoc());
-  AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
   if (Attr)
     ProcessDeclAttributeList(S, Specialization, Attr);