diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-02 02:27:04 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-02 02:27:04 +0000 |
commit | 9dba9bd4cfd4c23921d7dd65ade04d4b8dd9ac0e (patch) | |
tree | a767760f7b8e6c3d41fc291938b66de6af5f4ac5 /llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | |
parent | 9dc3b5ff8954bee45fd8ed2e7de43de34f204944 (diff) | |
download | bcm5719-llvm-9dba9bd4cfd4c23921d7dd65ade04d4b8dd9ac0e.tar.gz bcm5719-llvm-9dba9bd4cfd4c23921d7dd65ade04d4b8dd9ac0e.zip |
AMDGPU: Use source modifiers with f16->f32 conversions
The operand types were defined to fit the fp16_to_fp node, which
has the half as an integer type. v_cvt_f32_f16 does support
source modifiers, so change this to have an FP type and modifiers.
For targets without legal f16, this requires recognizing the
bit operations and trying to produce them.
llvm-svn: 293857
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 53 |
1 files changed, 20 insertions, 33 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index d7d21311c1b..240fd071000 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -3,8 +3,8 @@ ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 -; CI: v_cvt_f32_f16_e32 -; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}| +; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}} ; VI-NOT: and ; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}| @@ -17,14 +17,15 @@ define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) { } ; GCN-LABEL: {{^}}fneg_fabs_fmul_f16: -; CI: v_cvt_f32_f16_e32 -; CI: v_cvt_f32_f16_e32 -; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| +; CI-DAG: v_cvt_f32_f16_e32 +; CI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG_ABS_X:v[0-9]+]], -|{{v[0-9]+}}| +; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}} ; CI: v_cvt_f16_f32_e32 ; VI-NOT: and -; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| -; VI-NOT: and +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}| +; VI-NOT: [[MUL]] +; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) { %fabs = call half @llvm.fabs.f16(half %x) %fsub = fsub half -0.000000e+00, %fabs @@ -49,10 +50,7 @@ define void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) { ; FIXME: Should use or ; GCN-LABEL: {{^}}fneg_fabs_f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) %fsub = fsub half -0.000000e+00, %fabs @@ -61,10 +59,7 @@ define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { } ; GCN-LABEL: {{^}}v_fneg_fabs_f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { %val = load half, half addrspace(1)* %in, align 2 %fabs = call half @llvm.fabs.f16(half %val) @@ -75,13 +70,10 @@ define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { ; FIXME: single bit op ; GCN-LABEL: {{^}}fneg_fabs_v2f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: flat_store_dword +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: store_dword define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fsub = fsub <2 x half> <half -0.000000e+00, half -0.000000e+00>, %fabs @@ -90,17 +82,12 @@ define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { } ; GCN-LABEL: {{^}}fneg_fabs_v4f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: flat_store_dwordx2 +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: store_dwordx2 define void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) %fsub = fsub <4 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %fabs |