diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-30 13:39:33 -0400 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2020-01-07 15:10:07 -0500 |
| commit | 68e70fb098a27d08e6dd039995c2acf14b894abc (patch) | |
| tree | efda1d63fe00ed82e362966c18c7a47a1776f19c /llvm/test/CodeGen/AMDGPU | |
| parent | 640d0ba8760051afc002c672121c6989517fc94e (diff) | |
| download | bcm5719-llvm-68e70fb098a27d08e6dd039995c2acf14b894abc.tar.gz bcm5719-llvm-68e70fb098a27d08e6dd039995c2acf14b894abc.zip | |
AMDGPU: Fix not using v_cvt_f16_[iu]16
We weren't treating i16->f16 casts as legal on targets with these
instructions, and always using a pair of casts through i32.
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sitofp.f16.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/uitofp.f16.ll | 13 |
4 files changed, 24 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll index 494613f45f7..97d5bcce619 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -365,15 +365,15 @@ entry: } ; GCN-LABEL: {{^}}sitofp_v2i16_to_v2f16: -; NOSDWA-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 -; NOSDWA-DAG: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; NOSDWA-DAG: v_cvt_f32_i32_e32 v{{[0-9]+}}, v{{[0-9]+}} -; NOSDWA-DAG: v_cvt_f32_i32_e32 v{{[0-9]+}}, v{{[0-9]+}} -; NOSDWA-NOT: v_cvt_f32_i32_sdwa +; NOSDWA-DAG: v_cvt_f16_i16_e32 v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} +; NOSDWA-DAG: v_cvt_f16_i16_e32 v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA-NOT: v_cvt_f16_i16_sdwa -; SDWA-DAG: v_cvt_f32_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; SDWA-DAG: v_cvt_f32_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDWA-DAG: v_cvt_f16_i16_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SDWA-DAG: v_cvt_f16_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}} dst_sel:{{(WORD_1|DWORD)?}} dst_unused:UNUSED_PAD src0_sel:WORD_1 +; FIXME: Should be able to avoid or define amdgpu_kernel void @sitofp_v2i16_to_v2f16( <2 x half> addrspace(1)* %r, <2 x i16> addrspace(1)* %a) { diff --git a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll index 231d3a6d1b4..3a3e0def36c 100644 --- a/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/sitofp.f16.ll @@ -3,8 +3,12 @@ ; GCN-LABEL: {{^}}sitofp_i16_to_f16 ; GCN: buffer_load_{{sshort|ushort}} v[[A_I16:[0-9]+]] -; GCN: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] -; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] + +; SI: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] +; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] + +; VI: v_cvt_f16_i16_e32 v[[R_F16:[0-9]+]], v[[A_I16]] + ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @sitofp_i16_to_f16( @@ -45,10 +49,8 @@ entry: ; SI-DAG: v_lshlrev_b32_e32 ; SI: v_or_b32_e32 -; VI-DAG: v_cvt_f32_i32_sdwa -; VI-DAG: v_cvt_f32_i32_sdwa -; VI-DAG: v_cvt_f16_f32_e32 -; VI-DAG: v_cvt_f16_f32_sdwa +; VI-DAG: v_cvt_f16_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; VI-DAG: v_cvt_f16_i16_e32 ; VI: v_or_b32_e32 ; GCN: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index 249945ccbec..75d6eb57cb0 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -104,7 +104,7 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, ; SI: v_cvt_f64_u32_e32 v{{\[[0-9]+:[0-9]+\]}}, [[ZEXT]] ; VI: s_and_b32 [[ZEXT:s[0-9]+]], [[VAL]], 0xff{{$}} -; VI: v_cvt_f64_i32_e32 v{{\[[0-9]+:[0-9]+\]}}, [[ZEXT]] +; VI: v_cvt_f64_u32_e32 v{{\[[0-9]+:[0-9]+\]}}, [[ZEXT]] define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) { %fp = uitofp i8 %in to double store double %fp, double addrspace(1)* %out @@ -118,7 +118,7 @@ define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 ; VI: v_mov_b32_e32 v{{[0-9]+}} ; VI: v_and_b32_sdwa -; VI: v_cvt_f64_i32_e32 v{{\[[0-9]+:[0-9]+\]}}, +; VI: v_cvt_f64_u32_e32 v{{\[[0-9]+:[0-9]+\]}}, define double @v_uint_to_fp_i8_to_f64(i8 %in) { %fp = uitofp i8 %in to double ret double %fp diff --git a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll index 9cedce70d24..1a42618d5a7 100644 --- a/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/uitofp.f16.ll @@ -4,8 +4,10 @@ ; GCN-LABEL: {{^}}uitofp_i16_to_f16 ; GCN: buffer_load_ushort v[[A_I16:[0-9]+]] ; SI: v_cvt_f32_u32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] -; VI: v_cvt_f32_i32_e32 v[[A_F32:[0-9]+]], v[[A_I16]] -; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] +; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] + +; VI: v_cvt_f16_u16_e32 v[[R_F16:[0-9]+]], v[[A_I16]] + ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @uitofp_i16_to_f16( @@ -46,10 +48,9 @@ entry: ; SI-DAG: v_lshlrev_b32_e32 ; SI: v_or_b32_e32 -; VI-DAG: v_cvt_f16_f32_e32 -; VI-DAG: v_cvt_f32_i32_sdwa -; VI-DAG: v_cvt_f32_i32_sdwa -; VI-DAG: v_cvt_f16_f32_sdwa + +; VI-DAG: v_cvt_f16_u16_e32 +; VI-DAG: v_cvt_f16_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_or_b32_e32 ; GCN: buffer_store_dword |

