diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2018-06-26 20:04:19 +0000 | 
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2018-06-26 20:04:19 +0000 | 
| commit | dacda79ee6a4a0b64485d63f51e20869f81f3114 (patch) | |
| tree | fc2f1c41e87d52e833a56aaf45161fafd4d42620 /llvm/test/CodeGen/AMDGPU | |
| parent | 2a2945a3c5f6f40c3edc5c24e50ce45eed3544f6 (diff) | |
| download | bcm5719-llvm-dacda79ee6a4a0b64485d63f51e20869f81f3114.tar.gz bcm5719-llvm-dacda79ee6a4a0b64485d63f51e20869f81f3114.zip  | |
[AMDGPU] Add llvm.amdgcn.fmad.ftz intrinsic
This intrinsic selects v_mad_f32 regardless of fp32 denorm support.
Differential Revision: https://reviews.llvm.org/D48573
llvm-svn: 335654
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll | 114 | 
1 files changed, 114 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll new file mode 100644 index 00000000000..c9c9ef167c4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll @@ -0,0 +1,114 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s + +declare float @llvm.amdgcn.fmad.ftz(float %a, float %b, float %c) + +; GCN-LABEL: {{^}}mad_f32: +; GCN:  v_ma{{[dc]}}_f32 +define amdgpu_kernel void @mad_f32( +    float addrspace(1)* %r, +    float addrspace(1)* %a, +    float addrspace(1)* %b, +    float addrspace(1)* %c) { +  %a.val = load float, float addrspace(1)* %a +  %b.val = load float, float addrspace(1)* %b +  %c.val = load float, float addrspace(1)* %c +  %r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %b.val, float %c.val) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +; GCN-LABEL: {{^}}mad_f32_imm_a: +; GCN: v_mov_b32_e32 [[KA:v[0-9]+]], 0x41000000 +; GCN:  v_ma{{[dc]}}_f32 {{v[0-9]+}}, [[KA]], +define amdgpu_kernel void @mad_f32_imm_a( +    float addrspace(1)* %r, +    float addrspace(1)* %b, +    float addrspace(1)* %c) { +  %b.val = load float, float addrspace(1)* %b +  %c.val = load float, float addrspace(1)* %c +  %r.val = call float @llvm.amdgcn.fmad.ftz(float 8.0, float %b.val, float %c.val) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +; GCN-LABEL: {{^}}mad_f32_imm_b: +; GCN: v_mov_b32_e32 [[KB:v[0-9]+]], 0x41000000 +; GCN:  v_ma{{[dc]}}_f32 {{v[0-9]+}}, {{[vs][0-9]+}}, [[KB]], +define amdgpu_kernel void @mad_f32_imm_b( +    float addrspace(1)* %r, +    float addrspace(1)* %a, +    float addrspace(1)* %c) { +  %a.val = load float, float addrspace(1)* %a +  %c.val = load float, float addrspace(1)* %c +  %r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float 8.0, float %c.val) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +; GCN-LABEL: {{^}}mad_f32_imm_c: +; GCN: v_mov_b32_e32 [[KC:v[0-9]+]], 0x41000000 +; GCN:  v_ma{{[dc]}}_f32 {{v[0-9]+}}, {{[vs][0-9]+}}, {{v[0-9]+}}, [[KC]]{{$}} +define amdgpu_kernel void @mad_f32_imm_c( +    float addrspace(1)* %r, +    float addrspace(1)* %a, +    float addrspace(1)* %b) { +  %a.val = load float, float addrspace(1)* %a +  %b.val = load float, float addrspace(1)* %b +  %r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %b.val, float 8.0) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +; GCN-LABEL: {{^}}mad_f32_neg_b: +; GCN:  v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @mad_f32_neg_b( +    float addrspace(1)* %r, +    float addrspace(1)* %a, +    float addrspace(1)* %b, +    float addrspace(1)* %c) { +  %a.val = load float, float addrspace(1)* %a +  %b.val = load float, float addrspace(1)* %b +  %c.val = load float, float addrspace(1)* %c +  %neg.b = fsub float -0.0, %b.val +  %r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %neg.b, float %c.val) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +; GCN-LABEL: {{^}}mad_f32_abs_b: +; GCN:  v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} +define amdgpu_kernel void @mad_f32_abs_b( +    float addrspace(1)* %r, +    float addrspace(1)* %a, +    float addrspace(1)* %b, +    float addrspace(1)* %c) { +  %a.val = load float, float addrspace(1)* %a +  %b.val = load float, float addrspace(1)* %b +  %c.val = load float, float addrspace(1)* %c +  %abs.b = call float @llvm.fabs.f32(float %b.val) +  %r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %abs.b, float %c.val) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +; GCN-LABEL: {{^}}mad_f32_neg_abs_b: +; GCN:  v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} +define amdgpu_kernel void @mad_f32_neg_abs_b( +    float addrspace(1)* %r, +    float addrspace(1)* %a, +    float addrspace(1)* %b, +    float addrspace(1)* %c) { +  %a.val = load float, float addrspace(1)* %a +  %b.val = load float, float addrspace(1)* %b +  %c.val = load float, float addrspace(1)* %c +  %abs.b = call float @llvm.fabs.f32(float %b.val) +  %neg.abs.b = fsub float -0.0, %abs.b +  %r.val = call float @llvm.amdgcn.fmad.ftz(float %a.val, float %neg.abs.b, float %c.val) +  store float %r.val, float addrspace(1)* %r +  ret void +} + +declare float @llvm.fabs.f32(float)  | 

