[CUDA][HIP][Sema] Fix template kernel with function as template parameter

If a kernel template has a function as its template parameter, a device function should be allowed as template argument since a kernel can call a device function. However, currently if the kernel template is instantiated in a host function, clang will emit an error message saying the device function is an invalid candidate for the template parameter. This happens because clang checks the reference to the device function during parsing the template arguments. At this point, the template is not instantiated yet. Clang incorrectly assumes the device function is called by the host function and emits the error message. This patch fixes the issue by disabling checking of device function during parsing template arguments and deferring the check to the instantion of the template. At that point, the template decl is already available, therefore the check can be done against the instantiated function template decl. Differential Revision: https://reviews.llvm.org/D56411 llvm-svn: 355421
author: Yaxun Liu <Yaxun.Liu@amd.com> 2019-03-05 18:19:35 +0000
committer: Yaxun Liu <Yaxun.Liu@amd.com> 2019-03-05 18:19:35 +0000
commit: c5be267003ef482864aa301bdf97eab5829cd2a2 (patch)
tree: a6037a4c5ec22112a82d635ed24ab5347f4691c9
parent: add2d2e30401c38ec14231ce78efd14bb7dc1ff6 (diff)
download: bcm5719-llvm-c5be267003ef482864aa301bdf97eab5829cd2a2.tar.gz
bcm5719-llvm-c5be267003ef482864aa301bdf97eab5829cd2a2.zip
4 files changed, 16 insertions, 3 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 5aa788eda2d..d062e8b201a 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -675,6 +675,11 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
   assert(Callee && "Callee may not be null.");
+
+  auto &ExprEvalCtx = ExprEvalContexts.back();
+  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
+    return true;
+
   // FIXME: Is bailing out early correct here?  Should we instead assume that
   // the caller is a global initializer?
   FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 157ef0b9426..19ed62d1fe6 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14799,6 +14799,9 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
     ResolveExceptionSpec(Loc, FPT);
 
+  if (getLangOpts().CUDA)
+    CheckCUDACall(Loc, Func);
+
   // If we don't need to mark the function as used, and we don't need to
   // try to provide a definition, there's nothing more to do.
   if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&
diff --git a/clang/test/SemaCUDA/call-device-fn-from-host.cu b/clang/test/SemaCUDA/call-device-fn-from-host.cu
index 26215d581d2..ba1ce860204 100644
--- a/clang/test/SemaCUDA/call-device-fn-from-host.cu
+++ b/clang/test/SemaCUDA/call-device-fn-from-host.cu
@@ -37,7 +37,7 @@ __host__ __device__ void T::hd3() {
 }
 
 template <typename T> __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
 void host_fn() { hd2<int>(); }
 
 __host__ __device__ void hd() { device_fn(); }
@@ -90,3 +90,8 @@ __host__ __device__ void fn_ptr_template() {
 static __host__ __device__ void hd_func() { device_fn(); }
 __global__ void kernel() { hd_func(); }
 void host_func(void) { kernel<<<1, 1>>>(); }
+
+// Should allow host function call kernel template with device function argument.
+__device__ void f();
+template<void(*F)()> __global__ void t() { F(); }
+__host__ void g() { t<f><<<1,1>>>(); }
diff --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu b/clang/test/SemaCUDA/call-host-fn-from-device.cu
index acdd291b664..c5bbd63d8e0 100644
--- a/clang/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/clang/test/SemaCUDA/call-host-fn-from-device.cu
@@ -56,14 +56,14 @@ __host__ __device__ void T::hd3() {
 }
 
 template <typename T> __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 __global__ void kernel() { hd2<int>(); }
 
 __host__ __device__ void hd() { host_fn(); }
 // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 
 template <typename T> __host__ __device__ void hd3() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 __device__ void device_fn() { hd3<int>(); }
 
 // No error because this is never instantiated.
author	Yaxun Liu <Yaxun.Liu@amd.com>	2019-03-05 18:19:35 +0000
committer	Yaxun Liu <Yaxun.Liu@amd.com>	2019-03-05 18:19:35 +0000
commit	c5be267003ef482864aa301bdf97eab5829cd2a2 (patch)
tree	a6037a4c5ec22112a82d635ed24ab5347f4691c9
parent	add2d2e30401c38ec14231ce78efd14bb7dc1ff6 (diff)
download	bcm5719-llvm-c5be267003ef482864aa301bdf97eab5829cd2a2.tar.gz bcm5719-llvm-c5be267003ef482864aa301bdf97eab5829cd2a2.zip