summaryrefslogtreecommitdiffstats
path: root/polly/lib/CodeGen/PPCGCodeGeneration.cpp
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-08-31 13:03:37 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-08-31 13:03:37 +0000
commit56572c6a5e47eb8581662f436810e2472416e91f (patch)
tree0defd5a1f42823af74f391e6af6d6b1d679d6bf3 /polly/lib/CodeGen/PPCGCodeGeneration.cpp
parent80df64239593200f9d79312fd22975457f981b58 (diff)
downloadbcm5719-llvm-56572c6a5e47eb8581662f436810e2472416e91f.tar.gz
bcm5719-llvm-56572c6a5e47eb8581662f436810e2472416e91f.zip
[PPCGCodeGen] Convert intrinsics to libdevice functions whenever possible.
This is useful when we face certain intrinsics such as `llvm.exp.*` which cannot be lowered by the NVPTX backend while other intrinsics can. So, we would need to keep blacklists of intrinsics that cannot be handled by the NVPTX backend. It is much simpler to try and promote all intrinsics to libdevice versions. This patch makes function/intrinsic very uniform, and will always try to use a libdevice version if it exists. Differential Revision: https://reviews.llvm.org/D37056 llvm-svn: 312239
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp48
1 files changed, 41 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 21edbc7be1a..03625318c76 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -1383,15 +1383,36 @@ isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) {
/// A list of functions that are available in NVIDIA's libdevice.
const std::set<std::string> CUDALibDeviceFunctions = {
- "exp", "expf", "expl", "cos", "cosf", "sqrt",
- "sqrtf", "copysign", "copysignf", "copysignl", "log", "logf"};
+ "exp", "expf", "expl", "cos", "cosf", "sqrt", "sqrtf",
+ "copysign", "copysignf", "copysignl", "log", "logf", "powi", "powif"};
+
+// A map from intrinsics to their corresponding libdevice functions.
+const std::map<std::string, std::string> IntrinsicToLibdeviceFunc = {
+ {"llvm.exp.f64", "exp"},
+ {"llvm.exp.f32", "expf"},
+ {"llvm.powi.f64", "powi"},
+ {"llvm.powi.f32", "powif"}};
/// Return the corresponding CUDA libdevice function name for @p F.
+/// Note that this function will try to convert instrinsics in the list
+/// IntrinsicToLibdeviceFunc into libdevice functions.
+/// This is because some intrinsics such as `exp`
+/// are not supported by the NVPTX backend.
+/// If this restriction of the backend is lifted, we should refactor our code
+/// so that we use intrinsics whenever possible.
///
/// Return "" if we are not compiling for CUDA.
std::string getCUDALibDeviceFuntion(Function *F) {
- if (CUDALibDeviceFunctions.count(F->getName()))
- return std::string("__nv_") + std::string(F->getName());
+ const std::string FnName = [&] {
+ auto It = IntrinsicToLibdeviceFunc.find(F->getName());
+ if (It != IntrinsicToLibdeviceFunc.end())
+ return It->second;
+
+ return std::string(F->getName());
+ }();
+
+ if (CUDALibDeviceFunctions.count(FnName))
+ return "__nv_" + FnName;
return "";
}
@@ -1409,7 +1430,7 @@ static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) {
return F->isIntrinsic() &&
(Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") ||
- Name.startswith("llvm.copysign") || Name.startswith("llvm.powi"));
+ Name.startswith("llvm.copysign"));
}
/// Do not take `Function` as a subtree value.
@@ -2362,9 +2383,22 @@ bool GPUNodeBuilder::requiresCUDALibDevice() {
if (!F.isDeclaration())
continue;
- std::string CUDALibDeviceFunc = getCUDALibDeviceFuntion(&F);
+ const std::string CUDALibDeviceFunc = getCUDALibDeviceFuntion(&F);
if (CUDALibDeviceFunc.length() != 0) {
- F.setName(CUDALibDeviceFunc);
+ // We need to handle the case where a module looks like this:
+ // @expf(..)
+ // @llvm.exp.f64(..)
+ // Both of these functions would be renamed to `__nv_expf`.
+ //
+ // So, we must first check for the existence of the libdevice function.
+ // If this exists, we replace our current function with it.
+ //
+ // If it does not exist, we rename the current function to the
+ // libdevice functiono name.
+ if (Function *Replacement = F.getParent()->getFunction(CUDALibDeviceFunc))
+ F.replaceAllUsesWith(Replacement);
+ else
+ F.setName(CUDALibDeviceFunc);
RequiresLibDevice = true;
}
}
OpenPOWER on IntegriCloud