diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-09-17 13:24:30 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-09-17 13:24:30 +0000 |
| commit | 80ea6dd1d551c4a5a3d3d91b275c3b5242c12e7e (patch) | |
| tree | bb42bf082d5097cbe1595a94ee2bc4100a1ea1f2 | |
| parent | 87242f1052c01653b13f5b9511b9bd3fc6a7ba75 (diff) | |
| download | bcm5719-llvm-80ea6dd1d551c4a5a3d3d91b275c3b5242c12e7e.tar.gz bcm5719-llvm-80ea6dd1d551c4a5a3d3d91b275c3b5242c12e7e.zip | |
Fix vectorization of canonicalize
llvm-svn: 342390
| -rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 1 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll | 17 |
3 files changed, 21 insertions, 0 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f76a2426377..0cb1ab62785 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1036,6 +1036,9 @@ public: case Intrinsic::fabs: ISDs.push_back(ISD::FABS); break; + case Intrinsic::canonicalize: + ISDs.push_back(ISD::FCANONICALIZE); + break; case Intrinsic::minnum: ISDs.push_back(ISD::FMINNUM); if (FMF.noNaNs()) diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index bbdea4de4e2..2f4f72aaa6c 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -70,6 +70,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::powi: + case Intrinsic::canonicalize: return true; default: return false; diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll index 14e2cde0e29..55905a4c444 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll @@ -179,8 +179,25 @@ define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(half addrspace(3)* %a, ha ret void } +; GCN-LABEL: @canonicalize_v2f16 +; GFX9: load <2 x half> +; GFX9: call <2 x half> @llvm.canonicalize.v2f16( +; GFX9: store <2 x half> +define amdgpu_kernel void @canonicalize_v2f16(half addrspace(3)* %a, half addrspace(3)* %c) { + %i0 = load half, half addrspace(3)* %a, align 2 + %canonicalize0 = call half @llvm.canonicalize.f16(half %i0) + %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1 + %i3 = load half, half addrspace(3)* %arrayidx3, align 2 + %canonicalize1 = call half @llvm.canonicalize.f16(half %i3) + store half %canonicalize0, half addrspace(3)* %c, align 2 + %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1 + store half %canonicalize1, half addrspace(3)* %arrayidx5, align 2 + ret void +} + declare half @llvm.fabs.f16(half) #1 declare half @llvm.fma.f16(half, half, half) #1 +declare half @llvm.canonicalize.f16(half) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } |

