summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Sema/Sema.h8
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp39
-rw-r--r--clang/lib/Sema/SemaDecl.cpp12
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp53
-rw-r--r--clang/test/SemaCUDA/function-template-overload.cu41
5 files changed, 113 insertions, 40 deletions
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index fcc0d53b2d4..0523282e0a3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9420,7 +9420,8 @@ public:
///
/// Use this rather than examining the function's attributes yourself -- you
/// will get it wrong. Returns CFT_Host if D is null.
- CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
+ CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
+ bool IgnoreImplicitHDAttr = false);
CUDAFunctionTarget IdentifyCUDATarget(const AttributeList *Attr);
/// Gets the CUDA target for the current context.
@@ -9522,7 +9523,10 @@ public:
/// Check whether NewFD is a valid overload for CUDA. Emits
/// diagnostics and invalidates NewFD if not.
- void checkCUDATargetOverload(FunctionDecl *NewFD, LookupResult &Previous);
+ void checkCUDATargetOverload(FunctionDecl *NewFD,
+ const LookupResult &Previous);
+ /// Copies target attributes from the template TD to the function FD.
+ void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
/// \name Code completion
//@{
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 5e6d0e3e53b..6f272ec839f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -93,8 +93,17 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) {
return CFT_Host;
}
+template <typename A>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+ return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
+ return isa<A>(Attribute) &&
+ !(IgnoreImplicitAttr && Attribute->isImplicit());
+ });
+}
+
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
-Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
+Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
+ bool IgnoreImplicitHDAttr) {
// Code that lives outside a function is run on the host.
if (D == nullptr)
return CFT_Host;
@@ -105,13 +114,13 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
if (D->hasAttr<CUDAGlobalAttr>())
return CFT_Global;
- if (D->hasAttr<CUDADeviceAttr>()) {
- if (D->hasAttr<CUDAHostAttr>())
+ if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
+ if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
return CFT_HostDevice;
return CFT_Device;
- } else if (D->hasAttr<CUDAHostAttr>()) {
+ } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
return CFT_Host;
- } else if (D->isImplicit()) {
+ } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
// Some implicit declarations (like intrinsic functions) are not marked.
// Set the most lenient target on them for maximal flexibility.
return CFT_HostDevice;
@@ -856,7 +865,7 @@ void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
}
void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
- LookupResult &Previous) {
+ const LookupResult &Previous) {
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
for (NamedDecl *OldND : Previous) {
@@ -883,3 +892,21 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
}
}
}
+
+template <typename AttrTy>
+static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
+ const FunctionDecl &TemplateFD) {
+ if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
+ AttrTy *Clone = Attribute->clone(S.Context);
+ Clone->setInherited(true);
+ FD->addAttr(Clone);
+ }
+}
+
+void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
+ const FunctionTemplateDecl &TD) {
+ const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
+ copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
+ copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
+ copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
+}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index be2466c9235..5d13c8fa039 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8305,9 +8305,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// Handle attributes.
ProcessDeclAttributes(S, NewFD, D);
- if (getLangOpts().CUDA)
- maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
-
if (getLangOpts().OpenCL) {
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
// type declaration will generate a compilation error.
@@ -8410,6 +8407,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
TemplateArgs.setRAngleLoc(D.getIdentifierLoc());
}
+ // We do not add HD attributes to specializations here because
+ // they may have different constexpr-ness compared to their
+ // templates and, after maybeAddCUDAHostDeviceAttrs() is applied,
+ // may end up with different effective targets. Instead, a
+ // specialization inherits its target attributes from its template
+ // in the CheckFunctionTemplateSpecialization() call below.
+ if (getLangOpts().CUDA & !isFunctionTemplateSpecialization)
+ maybeAddCUDAHostDeviceAttrs(NewFD, Previous);
+
// If it's a friend (and only if it's a friend), it's possible
// that either the specialized function type or the specialized
// template is dependent, and therefore matching will fail. In
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 898765cbd79..4846b25232a 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -7043,13 +7043,15 @@ bool Sema::CheckFunctionTemplateSpecialization(
continue;
}
- // Target attributes are part of function signature during cuda
- // compilation, so deduced template must also have matching CUDA
- // target. Given that regular template deduction does not take
- // target attributes into account, we perform target match check
- // here and reject candidates that have different target.
+ // Target attributes are part of the cuda function signature, so
+ // the deduced template's cuda target must match that of the
+ // specialization. Given that C++ template deduction does not
+ // take target attributes into account, we reject candidates
+ // here that have a different target.
if (LangOpts.CUDA &&
- IdentifyCUDATarget(Specialization) != IdentifyCUDATarget(FD)) {
+ IdentifyCUDATarget(Specialization,
+ /* IgnoreImplicitHDAttributes = */ true) !=
+ IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) {
FailedCandidates.addCandidate().set(
I.getPair(), FunTmpl->getTemplatedDecl(),
MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
@@ -7166,6 +7168,14 @@ bool Sema::CheckFunctionTemplateSpecialization(
SpecInfo->getTemplateSpecializationKind(),
ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr);
+ // A function template specialization inherits the target attributes
+ // of its template. (We require the attributes explicitly in the
+ // code to match, but a template may have implicit attributes by
+ // virtue e.g. of being constexpr, and it passes these implicit
+ // attributes on to its specializations.)
+ if (LangOpts.CUDA)
+ inheritCUDATargetAttrs(FD, *Specialization->getPrimaryTemplate());
+
// The "previous declaration" for this function template specialization is
// the prior function template specialization.
Previous.clear();
@@ -8154,24 +8164,19 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
continue;
}
- // Target attributes are part of function signature during cuda
- // compilation, so deduced template must also have matching CUDA
- // target. Given that regular template deduction does not take it
- // into account, we perform target match check here and reject
- // candidates that have different target.
- if (LangOpts.CUDA) {
- CUDAFunctionTarget DeclaratorTarget = IdentifyCUDATarget(Attr);
- // We need to adjust target when HD is forced by
- // #pragma clang force_cuda_host_device
- if (ForceCUDAHostDeviceDepth > 0 &&
- (DeclaratorTarget == CFT_Device || DeclaratorTarget == CFT_Host))
- DeclaratorTarget = CFT_HostDevice;
- if (IdentifyCUDATarget(Specialization) != DeclaratorTarget) {
- FailedCandidates.addCandidate().set(
- P.getPair(), FunTmpl->getTemplatedDecl(),
- MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
- continue;
- }
+ // Target attributes are part of the cuda function signature, so
+ // the cuda target of the instantiated function must match that of its
+ // template. Given that C++ template deduction does not take
+ // target attributes into account, we reject candidates here that
+ // have a different target.
+ if (LangOpts.CUDA &&
+ IdentifyCUDATarget(Specialization,
+ /* IgnoreImplicitHDAttributes = */ true) !=
+ IdentifyCUDATarget(Attr)) {
+ FailedCandidates.addCandidate().set(
+ P.getPair(), FunTmpl->getTemplatedDecl(),
+ MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
+ continue;
}
Matches.addDecl(Specialization, P.getAccess());
diff --git a/clang/test/SemaCUDA/function-template-overload.cu b/clang/test/SemaCUDA/function-template-overload.cu
index 8adeb849570..56bba653958 100644
--- a/clang/test/SemaCUDA/function-template-overload.cu
+++ b/clang/test/SemaCUDA/function-template-overload.cu
@@ -31,7 +31,8 @@ template <> __device__ DType overload_h_d(long a); // OK. instantiates D
template <> __host__ HType overload_h_d(long a); // OK. instantiates H
-// Can't overload HD template with H or D template, though functions are OK.
+// Can't overload HD template with H or D template, though
+// non-template functions are OK.
template <typename T> __host__ __device__ HDType overload_hd(T a) { return HDType(); }
// expected-note@-1 {{previous declaration is here}}
// expected-note@-2 2 {{candidate template ignored: could not match 'HDType' against 'HType'}}
@@ -56,24 +57,54 @@ template <typename T> __host__ HType overload_h_d2(T a) { return HType(); }
template <typename T> __host__ __device__ HDType overload_h_d2(T a) { return HDType(); }
template <typename T1, typename T2 = int> __device__ DType overload_h_d2(T1 a) { T1 x; T2 y; return DType(); }
+// constexpr functions are implicitly HD, but explicit
+// instantiation/specialization must use target attributes as written.
+template <typename T> constexpr T overload_ce_implicit_hd(T a) { return a+1; }
+// expected-note@-1 3 {{candidate template ignored: target attributes do not match}}
+
+// These will not match the template.
+template __host__ __device__ int overload_ce_implicit_hd(int a);
+// expected-error@-1 {{explicit instantiation of 'overload_ce_implicit_hd' does not refer to a function template, variable template, member function, member class, or static data member}}
+template <> __host__ __device__ long overload_ce_implicit_hd(long a);
+// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+template <> __host__ __device__ constexpr long overload_ce_implicit_hd(long a);
+// expected-error@-1 {{no function template matches function template specialization 'overload_ce_implicit_hd'}}
+
+// These should work, because template matching ignores the implicit
+// HD attributes the compiler gives to constexpr functions/templates,
+// so 'overload_ce_implicit_hd' template will match __host__ functions
+// only.
+template __host__ int overload_ce_implicit_hd(int a);
+template <> __host__ long overload_ce_implicit_hd(long a);
+
+template float overload_ce_implicit_hd(float a);
+template <> float* overload_ce_implicit_hd(float *a);
+template <> constexpr double overload_ce_implicit_hd(double a) { return a + 3.0; };
+
__host__ void hf() {
overload_hd(13);
+ overload_ce_implicit_hd('h'); // Implicitly instantiated
+ overload_ce_implicit_hd(1.0f); // Explicitly instantiated
+ overload_ce_implicit_hd(2.0); // Explicitly specialized
HType h = overload_h_d(10);
HType h2i = overload_h_d2<int>(11);
HType h2ii = overload_h_d2<int>(12);
// These should be implicitly instantiated from __host__ template returning HType.
- DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}}
- DType d2i = overload_h_d2<int>(21); // expected-error {{no viable conversion from 'HType' to 'DType'}}
+ DType d = overload_h_d(20); // expected-error {{no viable conversion from 'HType' to 'DType'}}
+ DType d2i = overload_h_d2<int>(21); // expected-error {{no viable conversion from 'HType' to 'DType'}}
DType d2ii = overload_h_d2<int>(22); // expected-error {{no viable conversion from 'HType' to 'DType'}}
}
__device__ void df() {
overload_hd(23);
+ overload_ce_implicit_hd('d'); // Implicitly instantiated
+ overload_ce_implicit_hd(1.0f); // Explicitly instantiated
+ overload_ce_implicit_hd(2.0); // Explicitly specialized
// These should be implicitly instantiated from __device__ template returning DType.
- HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}}
- HType h2i = overload_h_d2<int>(11); // expected-error {{no viable conversion from 'DType' to 'HType'}}
+ HType h = overload_h_d(10); // expected-error {{no viable conversion from 'DType' to 'HType'}}
+ HType h2i = overload_h_d2<int>(11); // expected-error {{no viable conversion from 'DType' to 'HType'}}
HType h2ii = overload_h_d2<int>(12); // expected-error {{no viable conversion from 'DType' to 'HType'}}
DType d = overload_h_d(20);
OpenPOWER on IntegriCloud