6 files changed, 18 insertions, 102 deletions
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp
index abeeaca6850..a64961c3152 100644
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@@ -3628,8 +3628,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     assert(AuxToolChain != nullptr && "No aux toolchain.");
     CmdArgs.push_back("-aux-triple");
     CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
-    CmdArgs.push_back("-fcuda-target-overloads");
-    CmdArgs.push_back("-fcuda-disable-target-call-checks");
   }
 
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 6b2e93f21bf..cc657ed5a3f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1557,15 +1557,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   if (Args.hasArg(OPT_fcuda_is_device))
     Opts.CUDAIsDevice = 1;
 
-  if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device))
-    Opts.CUDAAllowHostCallsFromHostDevice = 1;
-
-  if (Args.hasArg(OPT_fcuda_disable_target_call_checks))
-    Opts.CUDADisableTargetCallChecks = 1;
-
-  if (Args.hasArg(OPT_fcuda_target_overloads))
-    Opts.CUDATargetOverloads = 1;
-
   if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
     Opts.CUDAAllowVariadicFunctions = 1;
 
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index b947d073cf5..69b91eee0cb 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -92,9 +92,6 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
 Sema::CUDAFunctionPreference
 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
                              const FunctionDecl *Callee) {
-  assert(getLangOpts().CUDATargetOverloads &&
-         "Should not be called w/o enabled target overloads.");
-
   assert(Callee && "Callee must be valid.");
   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
   CUDAFunctionTarget CallerTarget =
@@ -130,13 +127,11 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
       return CFP_SameSide;
 
-    // We'll allow calls to non-mode-matching functions if target call
-    // checks are disabled. This is needed to avoid complaining about
-    // HD->H calls when we compile for device side and vice versa.
-    if (getLangOpts().CUDADisableTargetCallChecks)
-      return CFP_WrongSide;
-
-    return CFP_Never;
+    // Calls from HD to non-mode-matching functions (i.e., to host functions
+    // when compiling in device mode or to device functions when compiling in
+    // host mode) are allowed at the sema level, but eventually rejected if
+    // they're ever codegened.  TODO: Reject said calls earlier.
+    return CFP_WrongSide;
   }
 
   // (e) Calling across device/host boundary is not something you should do.
@@ -148,74 +143,10 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
   llvm_unreachable("All cases should've been handled by now.");
 }
 
-bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
-                           const FunctionDecl *Callee) {
-  // With target overloads enabled, we only disallow calling
-  // combinations with CFP_Never.
-  if (getLangOpts().CUDATargetOverloads)
-    return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
-
-  // The CUDADisableTargetCallChecks short-circuits this check: we assume all
-  // cross-target calls are valid.
-  if (getLangOpts().CUDADisableTargetCallChecks)
-    return false;
-
-  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
-                     CalleeTarget = IdentifyCUDATarget(Callee);
-
-  // If one of the targets is invalid, the check always fails, no matter what
-  // the other target is.
-  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
-    return true;
-
-  // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
-  // Callable from the device only."
-  if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
-    return true;
-
-  // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
-  // Callable from the host only."
-  // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
-  // Callable from the host only."
-  if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
-      (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
-    return true;
-
-  // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
-  // however, in which case the function is compiled for both the host and the
-  // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
-  // paths between host and device."
-  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
-    // If the caller is implicit then the check always passes.
-    if (Caller->isImplicit()) return false;
-
-    bool InDeviceMode = getLangOpts().CUDAIsDevice;
-    if (!InDeviceMode && CalleeTarget != CFT_Host)
-        return true;
-    if (InDeviceMode && CalleeTarget != CFT_Device) {
-      // Allow host device functions to call host functions if explicitly
-      // requested.
-      if (CalleeTarget == CFT_Host &&
-          getLangOpts().CUDAAllowHostCallsFromHostDevice) {
-        Diag(Caller->getLocation(),
-             diag::warn_host_calls_from_host_device)
-            << Callee->getNameAsString() << Caller->getNameAsString();
-        return false;
-      }
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
 template <typename T>
 static void EraseUnwantedCUDAMatchesImpl(
     Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
     std::function<const FunctionDecl *(const T &)> FetchDecl) {
-  assert(S.getLangOpts().CUDATargetOverloads &&
-         "Should not be called w/o enabled target overloads.");
   if (Matches.size() <= 1)
     return;
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index c8a9b9ac807..1f03735ef94 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -5648,10 +5648,9 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
     if (!D->isInExternCContext() || D->template hasAttr<OverloadableAttr>())
       return false;
 
-    // So do CUDA's host/device attributes if overloading is enabled.
-    if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
-        (D->template hasAttr<CUDADeviceAttr>() ||
-         D->template hasAttr<CUDAHostAttr>()))
+    // So do CUDA's host/device attributes.
+    if (S.getLangOpts().CUDA && (D->template hasAttr<CUDADeviceAttr>() ||
+                                 D->template hasAttr<CUDAHostAttr>()))
       return false;
   }
   return D->isExternC();
@@ -11667,12 +11666,11 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
       FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
     if (Context.BuiltinInfo.isConst(BuiltinID) && !FD->hasAttr<ConstAttr>())
       FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
-    if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads &&
-        Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
+    if (getLangOpts().CUDA && Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
         !FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAHostAttr>()) {
-      // Assign appropriate attribute depending on CUDA compilation
-      // mode and the target builtin belongs to. E.g. during host
-      // compilation, aux builtins are __device__, the rest are __host__.
+      // Add the appropriate attribute, depending on the CUDA compilation mode
+      // and which target the builtin belongs to. For example, during host
+      // compilation, aux builtins are __device__, while the rest are __host__.
       if (getLangOpts().CUDAIsDevice !=
           Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
         FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation()));
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 73d87b09804..4a8c9c95cc4 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -2397,7 +2397,7 @@ FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc,
            "found an unexpected usual deallocation function");
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
+  if (getLangOpts().CUDA)
     EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
 
   assert(Matches.size() == 1 &&
@@ -2431,7 +2431,7 @@ bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
       Matches.push_back(F.getPair());
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
+  if (getLangOpts().CUDA)
     EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
 
   // There's exactly one suitable operator;  pick it.
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index a0e189dbfc5..c7ee0c7b569 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1125,7 +1125,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
       return true;
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) {
+  if (getLangOpts().CUDA) {
     CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
                        OldTarget = IdentifyCUDATarget(Old);
     if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
@@ -8639,8 +8639,7 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
        Cand2.Function->hasAttr<EnableIfAttr>()))
     return hasBetterEnableIfAttrs(S, Cand1.Function, Cand2.Function);
 
-  if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
-      Cand1.Function && Cand2.Function) {
+  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
     FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
     return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
            S.IdentifyCUDAPreference(Caller, Cand2.Function);
@@ -8745,7 +8744,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
   // only on their host/device attributes. Specifically, if one
   // candidate call is WrongSide and the other is SameSide, we ignore
   // the WrongSide candidate.
-  if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads) {
+  if (S.getLangOpts().CUDA) {
     const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
     bool ContainsSameSideCandidate =
         llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
@@ -10299,8 +10298,7 @@ public:
       }
     }
 
-    if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
-        Matches.size() > 1)
+    if (S.getLangOpts().CUDA && Matches.size() > 1)
       EliminateSuboptimalCudaMatches();
   }