diff options
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 48 |
1 files changed, 41 insertions, 7 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 21edbc7be1a..03625318c76 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1383,15 +1383,36 @@ isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) { /// A list of functions that are available in NVIDIA's libdevice. const std::set<std::string> CUDALibDeviceFunctions = { - "exp", "expf", "expl", "cos", "cosf", "sqrt", - "sqrtf", "copysign", "copysignf", "copysignl", "log", "logf"}; + "exp", "expf", "expl", "cos", "cosf", "sqrt", "sqrtf", + "copysign", "copysignf", "copysignl", "log", "logf", "powi", "powif"}; + +// A map from intrinsics to their corresponding libdevice functions. +const std::map<std::string, std::string> IntrinsicToLibdeviceFunc = { + {"llvm.exp.f64", "exp"}, + {"llvm.exp.f32", "expf"}, + {"llvm.powi.f64", "powi"}, + {"llvm.powi.f32", "powif"}}; /// Return the corresponding CUDA libdevice function name for @p F. +/// Note that this function will try to convert instrinsics in the list +/// IntrinsicToLibdeviceFunc into libdevice functions. +/// This is because some intrinsics such as `exp` +/// are not supported by the NVPTX backend. +/// If this restriction of the backend is lifted, we should refactor our code +/// so that we use intrinsics whenever possible. /// /// Return "" if we are not compiling for CUDA. std::string getCUDALibDeviceFuntion(Function *F) { - if (CUDALibDeviceFunctions.count(F->getName())) - return std::string("__nv_") + std::string(F->getName()); + const std::string FnName = [&] { + auto It = IntrinsicToLibdeviceFunc.find(F->getName()); + if (It != IntrinsicToLibdeviceFunc.end()) + return It->second; + + return std::string(F->getName()); + }(); + + if (CUDALibDeviceFunctions.count(FnName)) + return "__nv_" + FnName; return ""; } @@ -1409,7 +1430,7 @@ static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) { return F->isIntrinsic() && (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || - Name.startswith("llvm.copysign") || Name.startswith("llvm.powi")); + Name.startswith("llvm.copysign")); } /// Do not take `Function` as a subtree value. @@ -2362,9 +2383,22 @@ bool GPUNodeBuilder::requiresCUDALibDevice() { if (!F.isDeclaration()) continue; - std::string CUDALibDeviceFunc = getCUDALibDeviceFuntion(&F); + const std::string CUDALibDeviceFunc = getCUDALibDeviceFuntion(&F); if (CUDALibDeviceFunc.length() != 0) { - F.setName(CUDALibDeviceFunc); + // We need to handle the case where a module looks like this: + // @expf(..) + // @llvm.exp.f64(..) + // Both of these functions would be renamed to `__nv_expf`. + // + // So, we must first check for the existence of the libdevice function. + // If this exists, we replace our current function with it. + // + // If it does not exist, we rename the current function to the + // libdevice functiono name. + if (Function *Replacement = F.getParent()->getFunction(CUDALibDeviceFunc)) + F.replaceAllUsesWith(Replacement); + else + F.setName(CUDALibDeviceFunc); RequiresLibDevice = true; } } |