summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td1
-rw-r--r--clang/include/clang/Sema/Sema.h81
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp142
-rw-r--r--clang/test/SemaCUDA/bad-calls-on-same-line.cu2
-rw-r--r--clang/test/SemaCUDA/call-device-fn-from-host.cu3
-rw-r--r--clang/test/SemaCUDA/call-host-fn-from-device.cu2
-rw-r--r--clang/test/SemaCUDA/call-stack-for-deferred-err.cu18
-rw-r--r--clang/test/SemaCUDA/exceptions.cu3
-rw-r--r--clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu17
-rw-r--r--clang/test/SemaCUDA/trace-through-global.cu10
10 files changed, 197 insertions, 82 deletions
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 407b84f2617..ede1d9e3a08 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6702,6 +6702,7 @@ def err_deleted_function_use : Error<"attempt to use a deleted function">;
def err_deleted_inherited_ctor_use : Error<
"constructor inherited by %0 from base class %1 is implicitly deleted">;
+def note_called_by : Note<"called by %0">;
def err_kern_type_not_void_return : Error<
"kernel function type %0 must have void return type">;
def err_kern_is_nonstatic_method : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 627aa5bfb32..78080f5e4a4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9249,26 +9249,42 @@ public:
/// Diagnostics that are emitted only if we discover that the given function
/// must be codegen'ed. Because handling these correctly adds overhead to
/// compilation, this is currently only enabled for CUDA compilations.
- llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
+ llvm::DenseMap<CanonicalDeclPtr<FunctionDecl>,
+ std::vector<PartialDiagnosticAt>>
CUDADeferredDiags;
/// FunctionDecls plus raw encodings of SourceLocations for which
/// CheckCUDACall has emitted a (maybe deferred) "bad call" diagnostic. We
/// use this to avoid emitting the same deferred diag twice.
- llvm::DenseSet<std::pair<FunctionDecl *, unsigned>> LocsWithCUDACallDiags;
+ llvm::DenseSet<std::pair<CanonicalDeclPtr<FunctionDecl>, unsigned>>
+ LocsWithCUDACallDiags;
- /// The set of CUDA functions that we've discovered must be emitted by tracing
- /// the call graph. Functions that we can tell a priori must be emitted
- /// aren't added to this set.
- llvm::DenseSet<FunctionDecl *> CUDAKnownEmittedFns;
+ /// A pair of a canonical FunctionDecl and a SourceLocation.
+ struct FunctionDeclAndLoc {
+ CanonicalDeclPtr<FunctionDecl> FD;
+ SourceLocation Loc;
+ };
+
+ /// An inverse call graph, mapping known-emitted functions to one of their
+ /// known-emitted callers (plus the location of the call).
+ ///
+ /// Functions that we can tell a priori must be emitted aren't added to this
+ /// map.
+ llvm::DenseMap</* Callee = */ CanonicalDeclPtr<FunctionDecl>,
+ /* Caller = */ FunctionDeclAndLoc>
+ CUDAKnownEmittedFns;
/// A partial call graph maintained during CUDA compilation to support
- /// deferred diagnostics. Specifically, functions are only added here if, at
- /// the time they're added, they are not known-emitted. As soon as we
- /// discover that a function is known-emitted, we remove it and everything it
- /// transitively calls from this set and add those functions to
- /// CUDAKnownEmittedFns.
- llvm::DenseMap<FunctionDecl *, llvm::SetVector<FunctionDecl *>> CUDACallGraph;
+ /// deferred diagnostics.
+ ///
+ /// Functions are only added here if, at the time they're considered, they are
+ /// not known-emitted. As soon as we discover that a function is
+ /// known-emitted, we remove it and everything it transitively calls from this
+ /// set and add those functions to CUDAKnownEmittedFns.
+ llvm::DenseMap</* Caller = */ CanonicalDeclPtr<FunctionDecl>,
+ /* Callees = */ llvm::MapVector<CanonicalDeclPtr<FunctionDecl>,
+ SourceLocation>>
+ CUDACallGraph;
/// Diagnostic builder for CUDA errors which may or may not be deferred.
///
@@ -9291,13 +9307,19 @@ public:
K_Nop,
/// Emit the diagnostic immediately (i.e., behave like Sema::Diag()).
K_Immediate,
+ /// Emit the diagnostic immediately, and, if it's a warning or error, also
+ /// emit a call stack showing how this function can be reached by an a
+ /// priori known-emitted function.
+ K_ImmediateWithCallStack,
/// Create a deferred diagnostic, which is emitted only if the function
- /// it's attached to is codegen'ed.
+ /// it's attached to is codegen'ed. Also emit a call stack as with
+ /// K_ImmediateWithCallStack.
K_Deferred
};
CUDADiagBuilder(Kind K, SourceLocation Loc, unsigned DiagID,
FunctionDecl *Fn, Sema &S);
+ ~CUDADiagBuilder();
/// Convertible to bool: True if we immediately emitted an error, false if
/// we didn't emit an error or we created a deferred error.
@@ -9309,38 +9331,29 @@ public:
///
/// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably
/// want to use these instead of creating a CUDADiagBuilder yourself.
- operator bool() const { return ImmediateDiagBuilder.hasValue(); }
+ operator bool() const { return ImmediateDiag.hasValue(); }
template <typename T>
friend const CUDADiagBuilder &operator<<(const CUDADiagBuilder &Diag,
const T &Value) {
- if (Diag.ImmediateDiagBuilder.hasValue())
- *Diag.ImmediateDiagBuilder << Value;
- else if (Diag.PartialDiagInfo.hasValue())
- Diag.PartialDiagInfo->PD << Value;
+ if (Diag.ImmediateDiag.hasValue())
+ *Diag.ImmediateDiag << Value;
+ else if (Diag.PartialDiag.hasValue())
+ *Diag.PartialDiag << Value;
return Diag;
}
private:
- struct PartialDiagnosticInfo {
- PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD,
- FunctionDecl *Fn)
- : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
-
- ~PartialDiagnosticInfo() {
- S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)});
- }
-
- Sema &S;
- SourceLocation Loc;
- PartialDiagnostic PD;
- FunctionDecl *Fn;
- };
+ Sema &S;
+ SourceLocation Loc;
+ unsigned DiagID;
+ FunctionDecl *Fn;
+ bool ShowCallStack;
// Invariant: At most one of these Optionals has a value.
// FIXME: Switch these to a Variant once that exists.
- llvm::Optional<Sema::SemaDiagnosticBuilder> ImmediateDiagBuilder;
- llvm::Optional<PartialDiagnosticInfo> PartialDiagInfo;
+ llvm::Optional<SemaDiagnosticBuilder> ImmediateDiag;
+ llvm::Optional<PartialDiagnostic> PartialDiag;
};
/// Creates a CUDADiagBuilder that emits the diagnostic if the current context
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 423aef370ba..7e05cc86ba1 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -488,22 +488,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
}
-Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
- unsigned DiagID, FunctionDecl *Fn,
- Sema &S) {
- switch (K) {
- case K_Nop:
- break;
- case K_Immediate:
- ImmediateDiagBuilder.emplace(S.Diag(Loc, DiagID));
- break;
- case K_Deferred:
- assert(Fn && "Must have a function to attach the deferred diag to.");
- PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn);
- break;
- }
-}
-
// In CUDA, there are some constructs which may appear in semantically-valid
// code, but trigger errors if we ever generate code for the function in which
// they appear. Essentially every construct you're not allowed to use on the
@@ -528,6 +512,54 @@ Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
// until we discover that the function is known-emitted, at which point we take
// it out of this map and emit the diagnostic.
+Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
+ unsigned DiagID, FunctionDecl *Fn,
+ Sema &S)
+ : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn),
+ ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) {
+ switch (K) {
+ case K_Nop:
+ break;
+ case K_Immediate:
+ case K_ImmediateWithCallStack:
+ ImmediateDiag.emplace(S.Diag(Loc, DiagID));
+ break;
+ case K_Deferred:
+ assert(Fn && "Must have a function to attach the deferred diag to.");
+ PartialDiag.emplace(S.PDiag(DiagID));
+ break;
+ }
+}
+
+// Print notes showing how we can reach FD starting from an a priori
+// known-callable function.
+static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) {
+ auto FnIt = S.CUDAKnownEmittedFns.find(FD);
+ while (FnIt != S.CUDAKnownEmittedFns.end()) {
+ DiagnosticBuilder Builder(
+ S.Diags.Report(FnIt->second.Loc, diag::note_called_by));
+ Builder << FnIt->second.FD;
+ Builder.setForceEmit();
+
+ FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD);
+ }
+}
+
+Sema::CUDADiagBuilder::~CUDADiagBuilder() {
+ if (ImmediateDiag) {
+ // Emit our diagnostic and, if it was a warning or error, output a callstack
+ // if Fn isn't a priori known-emitted.
+ bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel(
+ DiagID, Loc) >= DiagnosticsEngine::Warning;
+ ImmediateDiag.reset(); // Emit the immediate diag.
+ if (IsWarningOrError && ShowCallStack)
+ EmitCallStackNotes(S, Fn);
+ } else if (PartialDiag) {
+ assert(ShowCallStack && "Must always show call stack for deferred diags.");
+ S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)});
+ }
+}
+
// Do we know that we will eventually codegen the given function?
static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
// Templates are emitted when they're instantiated.
@@ -568,7 +600,7 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
// mode until the function is known-emitted.
if (getLangOpts().CUDAIsDevice) {
return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
- ? CUDADiagBuilder::K_Immediate
+ ? CUDADiagBuilder::K_ImmediateWithCallStack
: CUDADiagBuilder::K_Deferred;
}
return CUDADiagBuilder::K_Nop;
@@ -596,7 +628,7 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
return CUDADiagBuilder::K_Nop;
return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
- ? CUDADiagBuilder::K_Immediate
+ ? CUDADiagBuilder::K_ImmediateWithCallStack
: CUDADiagBuilder::K_Deferred;
default:
return CUDADiagBuilder::K_Nop;
@@ -612,63 +644,84 @@ static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
auto It = S.CUDADeferredDiags.find(FD);
if (It == S.CUDADeferredDiags.end())
return;
+ bool HasWarningOrError = false;
for (PartialDiagnosticAt &PDAt : It->second) {
const SourceLocation &Loc = PDAt.first;
const PartialDiagnostic &PD = PDAt.second;
+ HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel(
+ PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning;
DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
Builder.setForceEmit();
PD.Emit(Builder);
}
S.CUDADeferredDiags.erase(It);
+
+ // FIXME: Should this be called after every warning/error emitted in the loop
+ // above, instead of just once per function? That would be consistent with
+ // how we handle immediate errors, but it also seems like a bit much.
+ if (HasWarningOrError)
+ EmitCallStackNotes(S, FD);
}
// Indicate that this function (and thus everything it transtively calls) will
// be codegen'ed, and emit any deferred diagnostics on this function and its
// (transitive) callees.
-static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
+static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller,
+ FunctionDecl *OrigCallee, SourceLocation OrigLoc) {
// Nothing to do if we already know that FD is emitted.
- if (IsKnownEmitted(S, FD)) {
- assert(!S.CUDACallGraph.count(FD));
+ if (IsKnownEmitted(S, OrigCallee)) {
+ assert(!S.CUDACallGraph.count(OrigCallee));
return;
}
- // We've just discovered that FD is known-emitted. Walk our call graph to see
- // what else we can now discover also must be emitted.
- llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD};
- llvm::SmallSet<FunctionDecl *, 4> Seen;
- Seen.insert(FD);
+ // We've just discovered that OrigCallee is known-emitted. Walk our call
+ // graph to see what else we can now discover also must be emitted.
+
+ struct CallInfo {
+ FunctionDecl *Caller;
+ FunctionDecl *Callee;
+ SourceLocation Loc;
+ };
+ llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}};
+ llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen;
+ Seen.insert(OrigCallee);
while (!Worklist.empty()) {
- FunctionDecl *Caller = Worklist.pop_back_val();
- assert(!IsKnownEmitted(S, Caller) &&
+ CallInfo C = Worklist.pop_back_val();
+ assert(!IsKnownEmitted(S, C.Callee) &&
"Worklist should not contain known-emitted functions.");
- S.CUDAKnownEmittedFns.insert(Caller);
- EmitDeferredDiags(S, Caller);
+ S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc};
+ EmitDeferredDiags(S, C.Callee);
// If this is a template instantiation, explore its callgraph as well:
// Non-dependent calls are part of the template's callgraph, while dependent
// calls are part of to the instantiation's call graph.
- if (auto *Templ = Caller->getPrimaryTemplate()) {
+ if (auto *Templ = C.Callee->getPrimaryTemplate()) {
FunctionDecl *TemplFD = Templ->getAsFunction();
if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
Seen.insert(TemplFD);
- Worklist.push_back(TemplFD);
+ Worklist.push_back(
+ {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc});
}
}
- // Add all functions called by Caller to our worklist.
- auto CGIt = S.CUDACallGraph.find(Caller);
+ // Add all functions called by Callee to our worklist.
+ auto CGIt = S.CUDACallGraph.find(C.Callee);
if (CGIt == S.CUDACallGraph.end())
continue;
- for (FunctionDecl *Callee : CGIt->second) {
- if (Seen.count(Callee) || IsKnownEmitted(S, Callee))
+ for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc :
+ CGIt->second) {
+ FunctionDecl *NewCallee = FDLoc.first;
+ SourceLocation CallLoc = FDLoc.second;
+ if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee))
continue;
- Seen.insert(Callee);
- Worklist.push_back(Callee);
+ Seen.insert(NewCallee);
+ Worklist.push_back(
+ {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc});
}
- // Caller is now known-emitted, so we no longer need to maintain its list of
- // callees in CUDACallGraph.
+ // C.Callee is now known-emitted, so we no longer need to maintain its list
+ // of callees in CUDACallGraph.
S.CUDACallGraph.erase(CGIt);
}
}
@@ -686,7 +739,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// Otherwise, mark the call in our call graph so we can traverse it later.
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
if (CallerKnownEmitted)
- MarkKnownEmitted(*this, Callee);
+ MarkKnownEmitted(*this, Caller, Callee, Loc);
else {
// If we have
// host fn calls kernel fn calls host+device,
@@ -695,7 +748,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// that, when compiling for host, only HD functions actually called from the
// host get marked as known-emitted.
if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
- CUDACallGraph[Caller].insert(Callee);
+ CUDACallGraph[Caller].insert({Callee, Loc});
}
CUDADiagBuilder::Kind DiagKind = [&] {
@@ -707,7 +760,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
// If we know the caller will be emitted, we know this wrong-side call
// will be emitted, so it's an immediate error. Otherwise, defer the
// error until we know the caller is emitted.
- return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate
+ return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack
: CUDADiagBuilder::K_Deferred;
default:
return CUDADiagBuilder::K_Nop;
@@ -729,7 +782,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
Caller, *this)
<< Callee;
- return DiagKind != CUDADiagBuilder::K_Immediate;
+ return DiagKind != CUDADiagBuilder::K_Immediate &&
+ DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack;
}
void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
diff --git a/clang/test/SemaCUDA/bad-calls-on-same-line.cu b/clang/test/SemaCUDA/bad-calls-on-same-line.cu
index e91baff5d28..53d5e128234 100644
--- a/clang/test/SemaCUDA/bad-calls-on-same-line.cu
+++ b/clang/test/SemaCUDA/bad-calls-on-same-line.cu
@@ -35,5 +35,7 @@ inline __host__ __device__ void hd() {
void host_fn() {
hd<int>();
hd<double>(); // expected-note {{function template specialization 'hd<double>'}}
+ // expected-note@-1 {{called by 'host_fn'}}
hd<float>(); // expected-note {{function template specialization 'hd<float>'}}
+ // expected-note@-1 {{called by 'host_fn'}}
}
diff --git a/clang/test/SemaCUDA/call-device-fn-from-host.cu b/clang/test/SemaCUDA/call-device-fn-from-host.cu
index ab88338b80d..dc227226101 100644
--- a/clang/test/SemaCUDA/call-device-fn-from-host.cu
+++ b/clang/test/SemaCUDA/call-device-fn-from-host.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -verify
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
+// RUN: -verify -verify-ignore-unexpected=note
// Note: This test won't work with -fsyntax-only, because some of these errors
// are emitted during codegen.
diff --git a/clang/test/SemaCUDA/call-host-fn-from-device.cu b/clang/test/SemaCUDA/call-host-fn-from-device.cu
index bb6ea230fa2..d484af14172 100644
--- a/clang/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/clang/test/SemaCUDA/call-host-fn-from-device.cu
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \
-// RUN: -emit-llvm -o /dev/null -verify
+// RUN: -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note
// Note: This test won't work with -fsyntax-only, because some of these errors
// are emitted during codegen.
diff --git a/clang/test/SemaCUDA/call-stack-for-deferred-err.cu b/clang/test/SemaCUDA/call-stack-for-deferred-err.cu
new file mode 100644
index 00000000000..ddcaabf4ef5
--- /dev/null
+++ b/clang/test/SemaCUDA/call-stack-for-deferred-err.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+// We should emit an error for hd_fn's use of a VLA. This would have been
+// legal if hd_fn were never codegen'ed on the device, so we should also print
+// out a callstack showing how we determine that hd_fn is known-emitted.
+//
+// Compare to no-call-stack-for-deferred-err.cu.
+
+inline __host__ __device__ void hd_fn(int n);
+inline __device__ void device_fn2() { hd_fn(42); } // expected-note {{called by 'device_fn2'}}
+
+__global__ void kernel() { device_fn2(); } // expected-note {{called by 'kernel'}}
+
+inline __host__ __device__ void hd_fn(int n) {
+ int vla[n]; // expected-error {{variable-length array}}
+}
diff --git a/clang/test/SemaCUDA/exceptions.cu b/clang/test/SemaCUDA/exceptions.cu
index 73d2b9d084e..49568ecac7e 100644
--- a/clang/test/SemaCUDA/exceptions.cu
+++ b/clang/test/SemaCUDA/exceptions.cu
@@ -50,3 +50,6 @@ inline __host__ __device__ void hd3() {
}
__device__ void call_hd3() { hd3(); }
+#ifdef __CUDA_ARCH__
+// expected-note@-2 {{called by 'call_hd3'}}
+#endif
diff --git a/clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu b/clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu
new file mode 100644
index 00000000000..6dc98695c1e
--- /dev/null
+++ b/clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+// Here we should dump an error about the VLA in device_fn, but we should not
+// print a callstack indicating how device_fn becomes known-emitted, because
+// it's an error to use a VLA in any __device__ function, even one that doesn't
+// get emitted.
+
+inline __device__ void device_fn(int n);
+inline __device__ void device_fn2() { device_fn(42); }
+
+__global__ void kernel() { device_fn2(); }
+
+inline __device__ void device_fn(int n) {
+ int vla[n]; // expected-error {{variable-length array}}
+}
diff --git a/clang/test/SemaCUDA/trace-through-global.cu b/clang/test/SemaCUDA/trace-through-global.cu
index 7a9b8dc72b5..065342fdd11 100644
--- a/clang/test/SemaCUDA/trace-through-global.cu
+++ b/clang/test/SemaCUDA/trace-through-global.cu
@@ -35,10 +35,16 @@ __global__ void kernel(int) { hd2(); }
template <typename T>
void launch_kernel() {
kernel<<<0, 0>>>(T());
- hd1();
- hd3(T());
+
+ // Notice that these two diagnostics are different: Because the call to hd1
+ // is not dependent on T, the call to hd1 comes from 'launch_kernel', while
+ // the call to hd3, being dependent, comes from 'launch_kernel<int>'.
+ hd1(); // expected-note {{called by 'launch_kernel'}}
+ hd3(T()); // expected-note {{called by 'launch_kernel<int>'}}
}
void host_fn() {
launch_kernel<int>();
+ // expected-note@-1 {{called by 'host_fn'}}
+ // expected-note@-2 {{called by 'host_fn'}}
}
OpenPOWER on IntegriCloud