diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-09-09 01:12:27 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-09-09 01:12:27 +0000 |
commit | acd68b58ae25e92e02a2b90b994bfa887d715855 (patch) | |
tree | 8e5d069b0096193bb5ec49ce8786db25bbfc0d20 /llvm/test/CodeGen/AMDGPU/half.ll | |
parent | d3233c1ed78fca9640cbfa145c19fe073c63eab3 (diff) | |
download | bcm5719-llvm-acd68b58ae25e92e02a2b90b994bfa887d715855.tar.gz bcm5719-llvm-acd68b58ae25e92e02a2b90b994bfa887d715855.zip |
SelectionDAG: Support Expand of f16 extloads
Currently this hits an assert that extload should
always be supported, which assumes integer extloads.
This moves a hack out of SI's argument lowering and
is covered by existing tests.
llvm-svn: 247113
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/half.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/half.ll | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll index bf8f11860b5..cd0b1b4e68d 100644 --- a/llvm/test/CodeGen/AMDGPU/half.ll +++ b/llvm/test/CodeGen/AMDGPU/half.ll @@ -112,12 +112,24 @@ define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x hal } ; GCN-LABEL: {{^}}extload_f16_to_f64_arg: +; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} +; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} +; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] +; GCN: buffer_store_dwordx2 [[RESULT]] define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { %ext = fpext half %arg to double store double %ext, double addrspace(1)* %out ret void } + ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { %ext = fpext <2 x half> %arg to <2 x double> store <2 x double> %ext, <2 x double> addrspace(1)* %out @@ -125,6 +137,16 @@ define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x ha } ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { %ext = fpext <3 x half> %arg to <3 x double> store <3 x double> %ext, <3 x double> addrspace(1)* %out @@ -132,6 +154,19 @@ define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x ha } ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { %ext = fpext <4 x half> %arg to <4 x double> store <4 x double> %ext, <4 x double> addrspace(1)* %out @@ -139,6 +174,37 @@ define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x ha } ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN: s_endpgm define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { %ext = fpext <8 x half> %arg to <8 x double> store <8 x double> %ext, <8 x double> addrspace(1)* %out @@ -194,6 +260,12 @@ define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace( } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x float> @@ -246,6 +318,15 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]] +; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]] +; GCN-DAG: buffer_store_dwordx2 [[CVT2]] +; GCN-DAG: buffer_store_dwordx2 [[CVT3]] +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x double> |