diff options
author | Tobias Grosser <tobias@grosser.es> | 2017-08-21 09:52:08 +0000 |
---|---|---|
committer | Tobias Grosser <tobias@grosser.es> | 2017-08-21 09:52:08 +0000 |
commit | b09bd74da8c1d5741d573c2bf2cb4e2de45d7d17 (patch) | |
tree | 9bcede94cb217ee8fff253fa5522ed804d1f874a | |
parent | 03c2208d5f0a20681bb3b9adb75bb634fa8b34cc (diff) | |
download | bcm5719-llvm-b09bd74da8c1d5741d573c2bf2cb4e2de45d7d17.tar.gz bcm5719-llvm-b09bd74da8c1d5741d573c2bf2cb4e2de45d7d17.zip |
[GPGPU] Add llvm.powi to the libdevice supported functions
These intrinsics are used in COSMO.
llvm-svn: 311324
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 2 | ||||
-rw-r--r-- | polly/test/GPGPU/intrinsic-copied-into-kernel.ll | 8 |
2 files changed, 7 insertions, 3 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 6e7ef9cc176..aa4886dc070 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1406,7 +1406,7 @@ static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) { return F->isIntrinsic() && (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || - Name.startswith("llvm.copysign")); + Name.startswith("llvm.copysign") || Name.startswith("llvm.powi")); } /// Do not take `Function` as a subtree value. diff --git a/polly/test/GPGPU/intrinsic-copied-into-kernel.ll b/polly/test/GPGPU/intrinsic-copied-into-kernel.ll index 49f4b2b39c2..13df82f28b4 100644 --- a/polly/test/GPGPU/intrinsic-copied-into-kernel.ll +++ b/polly/test/GPGPU/intrinsic-copied-into-kernel.ll @@ -14,6 +14,7 @@ ; KERNEL-IR: %p_sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val_p_scalar_) ; KERNEL-IR: declare float @llvm.sqrt.f32(float) ; KERNEL-IR: declare float @llvm.fabs.f32(float) +; KERNEL-IR: declare float @llvm.powi.f32(float, i32) ; Check that kernel launch is generated in host IR. ; the declare would not be generated unless a call to a kernel exists. @@ -26,7 +27,8 @@ ; float tmp1 = sqrt(tmp1); ; float tmp2 = fabs(tmp2); ; float tmp3 = copysignf(tmp1, tmp2); -; B[i] = tmp3; +; float tmp4 = powi(tmp3, 2); +; B[i] = tmp4; ; } ; } @@ -51,8 +53,9 @@ for.body: ; preds = %for.body.lr.ph, %fo %sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val) %fabs = tail call float @llvm.fabs.f32(float %sqrt); %copysign = tail call float @llvm.copysign.f32(float %sqrt, float %fabs); + %powi = tail call float @llvm.powi.f32(float %copysign, i32 2); %B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv - store float %copysign, float* %B.arr.i, align 4 + store float %powi, float* %B.arr.i, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %wide.trip.count = zext i32 %N to i64 @@ -70,6 +73,7 @@ for.end: ; preds = %for.cond.for.end_cr declare float @llvm.sqrt.f32(float) #0 declare float @llvm.fabs.f32(float) #0 declare float @llvm.copysign.f32(float, float) #0 +declare float @llvm.powi.f32(float, i32) #0 attributes #0 = { nounwind readnone } |