diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGCall.cpp | 4 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 8 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains.cpp | 4 | ||||
-rw-r--r-- | clang/lib/Frontend/CompilerInvocation.cpp | 3 |
4 files changed, 19 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index cbd7422e6aa..04c0a116001 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1768,6 +1768,10 @@ void CodeGenModule::ConstructAttributeList( // __syncthreads(), and so can't have certain optimizations applied around // them). LLVM will remove this attribute where it safely can. FuncAttrs.addAttribute(llvm::Attribute::Convergent); + + // Respect -fcuda-flush-denormals-to-zero. + if (getLangOpts().CUDADeviceFlushDenormalsToZero) + FuncAttrs.addAttribute("nvptx-f32ftz", "true"); } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 11f5bea64ca..f525246e6ef 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -472,6 +472,14 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } + if (LangOpts.CUDAIsDevice && getTarget().getTriple().isNVPTX()) { + // Indicate whether __nvvm_reflect should be configured to flush denormal + // floating point values to 0. (This corresponds to its "__CUDA_FTZ" + // property.) + getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", + LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0); + } + if (uint32_t PLevel = Context.getLangOpts().PICLevel) { llvm::PICLevel::Level PL = llvm::PICLevel::Default; switch (PLevel) { diff --git a/clang/lib/Driver/ToolChains.cpp b/clang/lib/Driver/ToolChains.cpp index 11ded7cd5eb..902338b2884 100644 --- a/clang/lib/Driver/ToolChains.cpp +++ b/clang/lib/Driver/ToolChains.cpp @@ -4208,6 +4208,10 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, Linux::addClangTargetOptions(DriverArgs, CC1Args); CC1Args.push_back("-fcuda-is-device"); + if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, + options::OPT_fno_cuda_flush_denormals_to_zero, false)) + CC1Args.push_back("-fcuda-flush-denormals-to-zero"); + if (DriverArgs.hasArg(options::OPT_nocudalib)) return; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 4d97b76d753..ba56665bc84 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1571,6 +1571,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) Opts.CUDAHostDeviceConstexpr = 0; + if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_flush_denormals_to_zero)) + Opts.CUDADeviceFlushDenormalsToZero = 1; + if (Opts.ObjC1) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); |