diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-10-13 20:45:49 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-10-13 20:45:49 +0000 |
| commit | 550c66d10f42e28325ccc738760f91fc51630d94 (patch) | |
| tree | 60c9cf34c9fdb795f9ebac41609af8faf765ad06 /llvm/test | |
| parent | 649c585710df40c22765fcc1822cc2d0f0ebb4c2 (diff) | |
| download | bcm5719-llvm-550c66d10f42e28325ccc738760f91fc51630d94.tar.gz bcm5719-llvm-550c66d10f42e28325ccc738760f91fc51630d94.zip | |
AMDGPU: Look for src mods before fp_extend
When selecting modifiers for mad_mix instructions,
look at fneg/fabs that occur before the conversion.
llvm-svn: 315748
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fpext-free.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/mad-mix.ll | 100 |
2 files changed, 102 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll index caa241a0ee7..0a504b3e03e 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll @@ -254,11 +254,9 @@ entry: ; fold (fsub (fpext (fneg (fmul, x, y))), z) ; -> (fneg (fma (fpext x), (fpext y), z)) -; FIXME: Should be able to fold fneg ; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32: ; GCN: s_waitcnt -; GFX9-F32FLUSH-NEXT: v_xor_b32_e32 v1, 0x8000, v1 -; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} ; GFX9-F32FLUSH-NEXT: s_setpc_b64 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 @@ -277,11 +275,9 @@ entry: ; fold (fsub (fneg (fpext (fmul, x, y))), z) ; -> (fneg (fma (fpext x)), (fpext y), z) -; FIXME: Should be able to fold fneg ; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32: ; GCN: s_waitcnt -; GFX9-F32FLUSH-NEXT: v_xor_b32_e32 v1, 0x8000, v1 -; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} ; GFX9-F32FLUSH-NEXT: s_setpc_b64 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index dfecdb76751..abffc3af2aa 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -398,6 +398,106 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src ret float %result } +; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 ; encoding +; GFX9-NEXT: s_setpc_b64 + +; CIVI: v_mad_f32 +define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 + %src0.neg = fsub half -0.0, %src0 + %src0.ext = fpext half %src0.neg to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float +; %src0.ext.neg = fsub float -0.0, %src0.ext + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; Make sure we don't fold pre-cvt fneg if we already have a fabs +; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; GFX9: s_waitcnt +define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 + %src0.neg = fsub half -0.0, %src0 + %src0.ext = fpext half %src0.neg to float + %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) + ret float %result +} + +; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 + %src0.abs = call half @llvm.fabs.f16(half %src0) + %src0.ext = fpext half %src0.abs to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %fneg = fsub <2 x half> <half -0.0, half -0.0>, %src0.arg.bc + %src0 = extractelement <2 x half> %fneg, i32 1 + %src0.ext = fpext half %src0 to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; FIXME: Should be able to fold +; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 +; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) + %src0 = extractelement <2 x half> %fabs, i32 1 + %src0.ext = fpext half %src0 to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; FIXME: Should be able to fold +; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 +; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) + %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs + %src0 = extractelement <2 x half> %fneg.fabs, i32 1 + %src0.ext = fpext half %src0 to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +declare half @llvm.fabs.f16(half) #2 +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 declare float @llvm.fabs.f32(float) #2 declare float @llvm.minnum.f32(float, float) #2 declare float @llvm.maxnum.f32(float, float) #2 |

