summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGCall.cpp4
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp8
-rw-r--r--clang/lib/Driver/ToolChains.cpp4
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp3
4 files changed, 19 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index cbd7422e6aa..04c0a116001 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1768,6 +1768,10 @@ void CodeGenModule::ConstructAttributeList(
// __syncthreads(), and so can't have certain optimizations applied around
// them). LLVM will remove this attribute where it safely can.
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+ // Respect -fcuda-flush-denormals-to-zero.
+ if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+ FuncAttrs.addAttribute("nvptx-f32ftz", "true");
}
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 11f5bea64ca..f525246e6ef 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -472,6 +472,14 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
}
+ if (LangOpts.CUDAIsDevice && getTarget().getTriple().isNVPTX()) {
+ // Indicate whether __nvvm_reflect should be configured to flush denormal
+ // floating point values to 0. (This corresponds to its "__CUDA_FTZ"
+ // property.)
+ getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
+ LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
+ }
+
if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
llvm::PICLevel::Level PL = llvm::PICLevel::Default;
switch (PLevel) {
diff --git a/clang/lib/Driver/ToolChains.cpp b/clang/lib/Driver/ToolChains.cpp
index 11ded7cd5eb..902338b2884 100644
--- a/clang/lib/Driver/ToolChains.cpp
+++ b/clang/lib/Driver/ToolChains.cpp
@@ -4208,6 +4208,10 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
Linux::addClangTargetOptions(DriverArgs, CC1Args);
CC1Args.push_back("-fcuda-is-device");
+ if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+ options::OPT_fno_cuda_flush_denormals_to_zero, false))
+ CC1Args.push_back("-fcuda-flush-denormals-to-zero");
+
if (DriverArgs.hasArg(options::OPT_nocudalib))
return;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 4d97b76d753..ba56665bc84 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1571,6 +1571,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
Opts.CUDAHostDeviceConstexpr = 0;
+ if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_flush_denormals_to_zero))
+ Opts.CUDADeviceFlushDenormalsToZero = 1;
+
if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();
OpenPOWER on IntegriCloud