[CUDA] Ignore implicit target attributes during function template instantiation.

Some functions and templates are treated as __host__ __device__ even when they don't have explicitly specified target attributes. What's worse, this treatment may change depending on command line options (-fno-cuda-host-device-constexpr) or #pragma clang force_cuda_host_device. Combined with strict checking for matching function target that comes with D25809(r288962), it makes it hard to write code which would explicitly instantiate or specialize some functions regardless of pragmas or command line options in effect. This patch changes the way we match target attributes of base template vs attributes used in explicit instantiation or specialization so that only explicitly specified attributes are considered. This makes base template selection behave consistently regardless of pragma of command line options that may affect CUDA target. Differential Revision: https://reviews.llvm.org/D25845 llvm-svn: 289091
author: Artem Belevich <tra@google.com> 2016-12-08 19:38:13 +0000
committer: Artem Belevich <tra@google.com> 2016-12-08 19:38:13 +0000
commit: 64135c35f771c4d0fb9f01e05d167df0d0445710 (patch)
tree: 3332e28bd1b82f1dd9b322f35827e888d4f64010 /clang/lib
parent: 6eead19ce488d8d4bdb227bade182f8c5667327a (diff)
download: bcm5719-llvm-64135c35f771c4d0fb9f01e05d167df0d0445710.tar.gz
bcm5719-llvm-64135c35f771c4d0fb9f01e05d167df0d0445710.zip
3 files changed, 71 insertions, 33 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 5e6d0e3e53b..6f272ec839f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -93,8 +93,17 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) {
   return CFT_Host;
 }
 
+template <typename A>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
+           return isa<A>(Attribute) &&
+                  !(IgnoreImplicitAttr && Attribute->isImplicit());
+         });
+}
+
 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+                                                  bool IgnoreImplicitHDAttr) {
   // Code that lives outside a function is run on the host.
   if (D == nullptr)
     return CFT_Host;
@@ -105,13 +114,13 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
   if (D->hasAttr<CUDAGlobalAttr>())
     return CFT_Global;
 
-  if (D->hasAttr<CUDADeviceAttr>()) {
-    if (D->hasAttr<CUDAHostAttr>())
+  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
+    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
       return CFT_HostDevice;
     return CFT_Device;
-  } else if (D->hasAttr<CUDAHostAttr>()) {
+  } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
     return CFT_Host;
-  } else if (D->isImplicit()) {
+  } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
     // Some implicit declarations (like intrinsic functions) are not marked.
     // Set the most lenient target on them for maximal flexibility.
     return CFT_HostDevice;
@@ -856,7 +865,7 @@ void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
 }
 
 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
-                                   LookupResult &Previous) {
+                                   const LookupResult &Previous) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
   CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
   for (NamedDecl *OldND : Previous) {
@@ -883,3 +892,21 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
     }
   }
 }
+
+template <typename AttrTy>
+static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
+                              const FunctionDecl &TemplateFD) {
+  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
+    AttrTy *Clone = Attribute->clone(S.Context);
+    Clone->setInherited(true);
+    FD->addAttr(Clone);
+  }
+}
+
+void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
+                                  const FunctionTemplateDecl &TD) {
+  const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
+  copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
+  copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
+  copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
+}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index be2466c9235..5d13c8fa039 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8305,9 +8305,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   // Handle attributes.
   ProcessDeclAttributes(S, NewFD, D);
 
-  if (getLangOpts().CUDA)
-    maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
-
   if (getLangOpts().OpenCL) {
     // OpenCL v1.1 s6.5: Using an address space qualifier in a function return
     // type declaration will generate a compilation error.
@@ -8410,6 +8407,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
     }
 
+    // We do not add HD attributes to specializations here because
+    // they may have different constexpr-ness compared to their
+    // templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
+    // may end up with different effective targets. Instead, a
+    // specialization inherits its target attributes from its template
+    // in the CheckFunctionTemplateSpecialization() call below.
+    if (getLangOpts().CUDA & !isFunctionTemplateSpecialization)
+      maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
+
     // If it's a friend (and only if it's a friend), it's possible
     // that either the specialized function type or the specialized
     // template is dependent, and therefore matching will fail.  In
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 898765cbd79..4846b25232a 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -7043,13 +7043,15 @@ bool Sema::CheckFunctionTemplateSpecialization(
         continue;
       }
 
-      // Target attributes are part of function signature during cuda
-      // compilation, so deduced template must also have matching CUDA
-      // target. Given that regular template deduction does not take
-      // target attributes into account, we perform target match check
-      // here and reject candidates that have different target.
+      // Target attributes are part of the cuda function signature, so
+      // the deduced template's cuda target must match that of the
+      // specialization.  Given that C++ template deduction does not
+      // take target attributes into account, we reject candidates
+      // here that have a different target.
       if (LangOpts.CUDA &&
-          IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) {
+          IdentifyCUDATarget(Specialization,
+                             /* IgnoreImplicitHDAttributes = */ true) !=
+              IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) {
         FailedCandidates.addCandidate().set(
             I.getPair(), FunTmpl->getTemplatedDecl(),
             MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
@@ -7166,6 +7168,14 @@ bool Sema::CheckFunctionTemplateSpecialization(
       SpecInfo->getTemplateSpecializationKind(),
       ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr);
 
+  // A function template specialization inherits the target attributes
+  // of its template.  (We require the attributes explicitly in the
+  // code to match, but a template may have implicit attributes by
+  // virtue e.g. of being constexpr, and it passes these implicit
+  // attributes on to its specializations.)
+  if (LangOpts.CUDA)
+    inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate());
+
   // The "previous declaration" for this function template specialization is
   // the prior function template specialization.
   Previous.clear();
@@ -8154,24 +8164,19 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
       continue;
     }
 
-    // Target attributes are part of function signature during cuda
-    // compilation, so deduced template must also have matching CUDA
-    // target. Given that regular template deduction does not take it
-    // into account, we perform target match check here and reject
-    // candidates that have different target.
-    if (LangOpts.CUDA) {
-      CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr);
-      // We need to adjust target when HD is forced by
-      // #pragma clang force_cuda_host_device
-      if (ForceCUDAHostDeviceDepth > 0 &&
-          (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host))
-        DeclaratorTarget = CFT_HostDevice;
-      if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) {
-        FailedCandidates.addCandidate().set(
-            P.getPair(), FunTmpl->getTemplatedDecl(),
-            MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
-        continue;
-      }
+    // Target attributes are part of the cuda function signature, so
+    // the cuda target of the instantiated function must match that of its
+    // template.  Given that C++ template deduction does not take
+    // target attributes into account, we reject candidates here that
+    // have a different target.
+    if (LangOpts.CUDA &&
+        IdentifyCUDATarget(Specialization,
+                           /* IgnoreImplicitHDAttributes = */ true) !=
+            IdentifyCUDATarget(Attr)) {
+      FailedCandidates.addCandidate().set(
+          P.getPair(), FunTmpl->getTemplatedDecl(),
+          MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+      continue;
     }
 
     Matches.addDecl(Specialization, P.getAccess());
author	Artem Belevich <tra@google.com>	2016-12-08 19:38:13 +0000
committer	Artem Belevich <tra@google.com>	2016-12-08 19:38:13 +0000
commit	64135c35f771c4d0fb9f01e05d167df0d0445710 (patch)
tree	3332e28bd1b82f1dd9b322f35827e888d4f64010 /clang/lib
parent	6eead19ce488d8d4bdb227bade182f8c5667327a (diff)
download	bcm5719-llvm-64135c35f771c4d0fb9f01e05d167df0d0445710.tar.gz bcm5719-llvm-64135c35f771c4d0fb9f01e05d167df0d0445710.zip