diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-08-06 21:51:52 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-08-06 21:51:52 +0000 |
| commit | ce6d61fba83d926c8dfacedae4d25c44e28ab893 (patch) | |
| tree | 557830c22f581d5c92fe2ba659fa506bb70e74c5 /llvm/test | |
| parent | ddbabc6b7c3fc401d7b59f4c905c5964cb3c5643 (diff) | |
| download | bcm5719-llvm-ce6d61fba83d926c8dfacedae4d25c44e28ab893.tar.gz bcm5719-llvm-ce6d61fba83d926c8dfacedae4d25c44e28ab893.zip | |
AMDGPU: Conversions always produce canonical results
Not sure why this was checking for denormals for f16.
My interpretation of the IEEE standard is conversions
should produce a canonical result, and the ISA manual
says denormals are created when appropriate.
llvm-svn: 339064
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index 4005c4d94cc..e3748e650b3 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -215,6 +215,22 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half ad ret void } +; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16: +; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}} +; GCN-NOT: v_mul +; GCN-NOT: v_max +; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] +define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id + %load = load half, half addrspace(1)* %gep, align 2 + %v = fpext half %load to float + %canonicalized = tail call float @llvm.canonicalize.f32(float %v) + %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id + store float %canonicalized, float addrspace(1)* %gep2, align 4 + ret void +} + ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64: ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}] ; GCN-NOT: v_mul @@ -233,8 +249,9 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double a ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32: ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} +; GCN-NOT: v_max +; GCN-NOT: v_mul ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] -; GCN-NOT: 1.0 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id @@ -246,6 +263,22 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float ad ret void } +; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16: +; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} +; GCN-NOT: v_max +; GCN-NOT: v_mul +; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] +define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id + %load = load float, float addrspace(1)* %gep, align 4 + %v = fptrunc float %load to half + %canonicalized = tail call half @llvm.canonicalize.f16(half %v) + %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id + store half %canonicalized, half addrspace(1)* %gep2, align 2 + ret void +} + ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32: ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}} @@ -738,3 +771,4 @@ declare double @llvm.maxnum.f64(double, double) #0 attributes #0 = { nounwind readnone } attributes #1 = { "no-nans-fp-math"="true" } +attributes #2 = { "target-features"="-fp64-fp16-denormals" } |

