diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 03:05:41 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 03:05:41 +0000 |
| commit | 4052a576c05a9d11b7f7ac354db901275101003b (patch) | |
| tree | e08f863ad685f9e13aa5d88520e15e85c20ab48a /llvm/test/CodeGen/AMDGPU | |
| parent | ce84130f8562c8c990362502f03d04187a0be581 (diff) | |
| download | bcm5719-llvm-4052a576c05a9d11b7f7ac354db901275101003b.tar.gz bcm5719-llvm-4052a576c05a9d11b7f7ac354db901275101003b.zip | |
AMDGPU: Custom lower f16 fdiv
llvm-svn: 290301
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fdiv.f16.ll | 44 |
1 files changed, 28 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll index bad04326193..da791f7e665 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll @@ -4,27 +4,39 @@ ; Make sure fdiv is promoted to f32. ; GCN-LABEL: {{^}}fdiv_f16 -; GCN: v_cvt_f32_f16 -; GCN: v_cvt_f32_f16 -; GCN: v_div_scale_f32 -; GCN-DAG: v_div_scale_f32 -; GCN-DAG: v_rcp_f32 -; GCN: v_fma_f32 -; GCN: v_fma_f32 -; GCN: v_mul_f32 -; GCN: v_fma_f32 -; GCN: v_fma_f32 -; GCN: v_fma_f32 -; GCN: v_div_fmas_f32 -; GCN: v_div_fixup_f32 -; GCN: v_cvt_f16_f32 +; SI: v_cvt_f32_f16 +; SI: v_cvt_f32_f16 +; SI: v_div_scale_f32 +; SI-DAG: v_div_scale_f32 +; SI-DAG: v_rcp_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_mul_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_div_fmas_f32 +; SI: v_div_fixup_f32 +; SI: v_cvt_f16_f32 + +; VI: buffer_load_ushort [[LHS:v[0-9]+]] +; VI: buffer_load_ushort [[RHS:v[0-9]+]] + +; VI-DAG: v_cvt_f32_f16_e32 [[CVT_LHS:v[0-9]+]], [[LHS]] +; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]] + +; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]] +; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]] +; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]] +; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]] +; VI: buffer_store_short [[RESULT]] define void @fdiv_f16( half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) { entry: - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b + %a.val = load volatile half, half addrspace(1)* %a + %b.val = load volatile half, half addrspace(1)* %b %r.val = fdiv half %a.val, %b.val store half %r.val, half addrspace(1)* %r ret void |

