diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-02 02:27:04 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-02 02:27:04 +0000 |
| commit | 9dba9bd4cfd4c23921d7dd65ade04d4b8dd9ac0e (patch) | |
| tree | a767760f7b8e6c3d41fc291938b66de6af5f4ac5 /llvm/test | |
| parent | 9dc3b5ff8954bee45fd8ed2e7de43de34f204944 (diff) | |
| download | bcm5719-llvm-9dba9bd4cfd4c23921d7dd65ade04d4b8dd9ac0e.tar.gz bcm5719-llvm-9dba9bd4cfd4c23921d7dd65ade04d4b8dd9ac0e.zip | |
AMDGPU: Use source modifiers with f16->f32 conversions
The operand types were defined to fit the fp16_to_fp node, which
has the half as an integer type. v_cvt_f32_f16 does support
source modifiers, so change this to have an FP type and modifiers.
For targets without legal f16, this requires recognizing the
bit operations and trying to produce them.
llvm-svn: 293857
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fabs.f16.ll | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fcmp.f16.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 53 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg.f16.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fpext.f16.ll | 199 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 61 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/v_mac_f16.ll | 124 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/gfx8_asm_all.s | 6 |
8 files changed, 379 insertions, 108 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll index c64aa6228c7..77c941356b9 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll @@ -19,8 +19,7 @@ define void @fabs_free_f16(half addrspace(1)* %out, i16 %in) { ; GCN-LABEL: {{^}}fabs_f16: ; CI: flat_load_ushort [[VAL:v[0-9]+]], -; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] -; CI: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], |[[CVT0]]| +; CI: v_and_b32_e32 [[CVT0:v[0-9]+]], 0x7fff, [[VAL]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define void @fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) @@ -30,10 +29,10 @@ define void @fabs_f16(half addrspace(1)* %out, half %in) { ; FIXME: Should be able to use single and ; GCN-LABEL: {{^}}fabs_v2f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| + +; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI: flat_load_ushort [[LO:v[0-9]+]] ; VI: flat_load_ushort [[HI:v[0-9]+]] @@ -51,10 +50,11 @@ define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { } ; GCN-LABEL: {{^}}fabs_v4f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} ; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} @@ -72,9 +72,10 @@ define void @fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { ; GCN-LABEL: {{^}}fabs_fold_f16: ; GCN: flat_load_ushort [[IN0:v[0-9]+]] ; GCN: flat_load_ushort [[IN1:v[0-9]+]] + ; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]] -; CI-DAG: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], [[IN1]] -; CI: v_mul_f32_e64 [[RESULT:v[0-9]+]], |[[CVT1]]|, [[CVT0]] +; CI-DAG: v_cvt_f32_f16_e64 [[ABS_CVT1:v[0-9]+]], |[[IN1]]| +; CI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[CVT0]], [[ABS_CVT1]] ; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll index a62726f7f06..8a01ea2fe43 100644 --- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll @@ -28,10 +28,10 @@ entry: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] +; SI: v_cvt_f32_f16_e64 v[[A_F32:[0-9]+]], |v[[A_F16]]| +; SI: v_cvt_f32_f16_e64 v[[B_F32:[0-9]+]], |v[[B_F16]]| -; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]| +; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] ; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]| ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index d7d21311c1b..240fd071000 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -3,8 +3,8 @@ ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 -; CI: v_cvt_f32_f16_e32 -; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}| +; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}} ; VI-NOT: and ; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}| @@ -17,14 +17,15 @@ define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) { } ; GCN-LABEL: {{^}}fneg_fabs_fmul_f16: -; CI: v_cvt_f32_f16_e32 -; CI: v_cvt_f32_f16_e32 -; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| +; CI-DAG: v_cvt_f32_f16_e32 +; CI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG_ABS_X:v[0-9]+]], -|{{v[0-9]+}}| +; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}} ; CI: v_cvt_f16_f32_e32 ; VI-NOT: and -; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| -; VI-NOT: and +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}| +; VI-NOT: [[MUL]] +; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) { %fabs = call half @llvm.fabs.f16(half %x) %fsub = fsub half -0.000000e+00, %fabs @@ -49,10 +50,7 @@ define void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) { ; FIXME: Should use or ; GCN-LABEL: {{^}}fneg_fabs_f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) %fsub = fsub half -0.000000e+00, %fabs @@ -61,10 +59,7 @@ define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { } ; GCN-LABEL: {{^}}v_fneg_fabs_f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { %val = load half, half addrspace(1)* %in, align 2 %fabs = call half @llvm.fabs.f16(half %val) @@ -75,13 +70,10 @@ define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { ; FIXME: single bit op ; GCN-LABEL: {{^}}fneg_fabs_v2f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: flat_store_dword +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: store_dword define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fsub = fsub <2 x half> <half -0.000000e+00, half -0.000000e+00>, %fabs @@ -90,17 +82,12 @@ define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { } ; GCN-LABEL: {{^}}fneg_fabs_v4f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: flat_store_dwordx2 +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: store_dwordx2 define void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) %fsub = fsub <4 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %fabs diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll index e3dfd9201a2..d545cc789d8 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll @@ -15,13 +15,9 @@ define void @s_fneg_f16(half addrspace(1)* %out, half %in) { ; FUNC-LABEL: {{^}}v_fneg_f16: ; GCN: flat_load_ushort [[VAL:v[0-9]+]], - -; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] -; CI: v_cvt_f16_f32_e64 [[CVT1:v[0-9]+]], -[[CVT0]] -; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]] - -; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]] +; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]] ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]] +; SI: buffer_store_short [[XOR]] define void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) { %val = load half, half addrspace(1)* %in, align 2 %fneg = fsub half -0.000000e+00, %val @@ -45,8 +41,9 @@ define void @fneg_free_f16(half addrspace(1)* %out, i16 %in) { ; FUNC-LABEL: {{^}}v_fneg_fold_f16: ; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]] -; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[CVT0]] -; CI: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[CVT0]], [[CVT0]] +; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]] +; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]] +; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_VAL]], [[NEG_CVT0]] ; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]] diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll index c4f5d7cdfb5..433fdf1e075 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll @@ -68,3 +68,202 @@ entry: store <2 x double> %r.val, <2 x double> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32: +; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} +define void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) { +entry: + %a.trunc = trunc i32 %a to i16 + %a.val = bitcast i16 %a.trunc to half + %r.val = fpext half %a.val to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]] +define void @fneg_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.neg = fsub half -0.0, %a.val + %r.val = fpext half %a.neg to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fabs_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]| +define void @fabs_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %r.val = fpext half %a.fabs to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_fabs_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]| +define void @fneg_fabs_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %a.fneg.fabs = fsub half -0.0, %a.fabs + %r.val = fpext half %a.fneg.fabs to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_multi_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[A]] + +; FIXME: Using the source modifier here only wastes code size +; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]] +; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] + +; GCN: store_dword [[CVT]] +; GCN: store_short [[XOR]] +define void @fneg_multi_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.neg = fsub half -0.0, %a.val + %r.val = fpext half %a.neg to float + store volatile float %r.val, float addrspace(1)* %r + store volatile half %a.neg, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fneg_multi_foldable_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_cvt_f32_f16_e64 [[CVTA_NEG:v[0-9]+]], -[[A]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]] +; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA]], [[CVTA_NEG]] +; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]] + +; VI-DAG: v_cvt_f32_f16_e64 [[CVT_NEGA:v[0-9]+]], -[[A]] +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], -[[A]], [[A]] + +; GCN: buffer_store_dword [[CVTA_NEG]] +; GCN: buffer_store_short [[MUL]] +define void @fneg_multi_foldable_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.neg = fsub half -0.0, %a.val + %r.val = fpext half %a.neg to float + %mul = fmul half %a.neg, %a.val + store volatile float %r.val, float addrspace(1)* %r + store volatile half %mul, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_multi_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]] + +; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]] +; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]| + +; GCN: store_dword [[CVT]] +; GCN: store_short [[XOR]] +define void @fabs_multi_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %r.val = fpext half %a.fabs to float + store volatile float %r.val, float addrspace(1)* %r + store volatile half %a.fabs, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_multi_foldable_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]] +; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], |[[CVTA]]|, [[CVTA]] +; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]] +; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[CVTA]] + +; VI-DAG: v_cvt_f32_f16_e64 [[ABS_A:v[0-9]+]], |[[A]]| +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], |[[A]]|, [[A]] + +; GCN: buffer_store_dword [[ABS_A]] +; GCN: buffer_store_short [[MUL]] +define void @fabs_multi_foldable_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %r.val = fpext half %a.fabs to float + %mul = fmul half %a.fabs, %a.val + store volatile float %r.val, float addrspace(1)* %r + store volatile half %mul, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_fneg_multi_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]] + +; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]] +; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]| + +; GCN: buffer_store_dword [[CVT]] +; GCN: buffer_store_short [[OR]] +define void @fabs_fneg_multi_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %a.fneg.fabs = fsub half -0.0, %a.fabs + %r.val = fpext half %a.fneg.fabs to float + store volatile float %r.val, float addrspace(1)* %r + store volatile half %a.fneg.fabs, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_fneg_multi_foldable_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]] +; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], -|[[CVTA]]|, [[CVTA]] +; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]] +; SI: v_or_b32_e32 [[FABS_FNEG:v[0-9]+]], 0x80000000, [[CVTA]] + +; VI-DAG: v_cvt_f32_f16_e64 [[FABS_FNEG:v[0-9]+]], -|[[A]]| +; VI-DAG: v_mul_f16_e64 [[MUL:v[0-9]+]], -|[[A]]|, [[A]] + +; GCN: buffer_store_dword [[FABS_FNEG]] +; GCN: buffer_store_short [[MUL]] +define void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %a.fneg.fabs = fsub half -0.0, %a.fabs + %r.val = fpext half %a.fneg.fabs to float + %mul = fmul half %a.fneg.fabs, %a.val + store volatile float %r.val, float addrspace(1)* %r + store volatile half %mul, half addrspace(1)* undef + ret void +} + +declare half @llvm.fabs.f16(half) #1 + +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index 284fc53c824..c9905d5f7ff 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; GCN-LABEL: {{^}}fptrunc_f32_to_f16 +; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[R_F16]] @@ -16,7 +16,7 @@ entry: ret void } -; GCN-LABEL: {{^}}fptrunc_f64_to_f16 +; GCN-LABEL: {{^}}fptrunc_f64_to_f16: ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}} ; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}} ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] @@ -32,7 +32,7 @@ entry: ret void } -; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16 +; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16: ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}} ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] @@ -51,7 +51,7 @@ entry: ret void } -; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16 +; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16: ; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}} ; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}} ; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}} @@ -70,3 +70,56 @@ entry: store <2 x half> %r.val, <2 x half> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16: +; GCN: buffer_load_dword v[[A_F32:[0-9]+]] +; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]] +; GCN: buffer_store_short v[[R_F16]] +; GCN: s_endpgm +define void @fneg_fptrunc_f32_to_f16( + half addrspace(1)* %r, + float addrspace(1)* %a) { +entry: + %a.val = load float, float addrspace(1)* %a + %a.fneg = fsub float -0.0, %a.val + %r.val = fptrunc float %a.fneg to half + store half %r.val, half addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16: +; GCN: buffer_load_dword v[[A_F32:[0-9]+]] +; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]| +; GCN: buffer_store_short v[[R_F16]] +; GCN: s_endpgm +define void @fabs_fptrunc_f32_to_f16( + half addrspace(1)* %r, + float addrspace(1)* %a) { +entry: + %a.val = load float, float addrspace(1)* %a + %a.fabs = call float @llvm.fabs.f32(float %a.val) + %r.val = fptrunc float %a.fabs to half + store half %r.val, half addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16: +; GCN: buffer_load_dword v[[A_F32:[0-9]+]] +; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]| +; GCN: buffer_store_short v[[R_F16]] +; GCN: s_endpgm +define void @fneg_fabs_fptrunc_f32_to_f16( + half addrspace(1)* %r, + float addrspace(1)* %a) { +entry: + %a.val = load float, float addrspace(1)* %a + %a.fabs = call float @llvm.fabs.f32(float %a.val) + %a.fneg.fabs = fsub float -0.0, %a.fabs + %r.val = fptrunc float %a.fneg.fabs to half + store half %r.val, half addrspace(1)* %r + ret void +} + +declare float @llvm.fabs.f32(float) #1 + +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll index 20c1d2310d3..413f3f337d3 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -31,9 +31,10 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_same_add +; GCN-LABEL: {{^}}mac_f16_same_add: ; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] ; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] ; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm @@ -63,9 +64,11 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_a -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_f16_neg_a: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm @@ -87,9 +90,10 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_b -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_f16_neg_b: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm @@ -111,9 +115,12 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_c -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_f16_neg_c: +; SI: v_cvt_f32_f16_e32 +; SI-DAG: v_cvt_f32_f16_e32 +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} ; GCN: s_endpgm @@ -207,9 +214,11 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} +; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} ; GCN: s_endpgm @@ -231,9 +240,11 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} +; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} ; GCN: s_endpgm @@ -255,9 +266,12 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} +; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math: +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} ; GCN: s_endpgm @@ -279,7 +293,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16 +; GCN-LABEL: {{^}}mac_v2f16: ; GCN: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]] ; GCN: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]] ; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]] @@ -322,7 +336,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_same_add +; GCN-LABEL: {{^}}mac_v2f16_same_add: ; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]] ; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]] ; SI: v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}} @@ -358,10 +372,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_a -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_v2f16_neg_a: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -385,9 +402,12 @@ entry: } ; GCN-LABEL: {{^}}mac_v2f16_neg_b -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -410,10 +430,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_v2f16_neg_c: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}} +; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} @@ -464,7 +487,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math +; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math: ; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} @@ -492,7 +515,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math +; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math: ; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} @@ -520,10 +543,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} +; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} @@ -546,10 +572,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} +; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} @@ -572,10 +601,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} +; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}} +; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_all.s b/llvm/test/MC/AMDGPU/gfx8_asm_all.s index 30e7eeeae12..fcf6e29acfd 100644 --- a/llvm/test/MC/AMDGPU/gfx8_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx8_asm_all.s @@ -24330,14 +24330,16 @@ v_cvt_f32_f16_e64 v0, exec_hi v_cvt_f32_f16_e64 v0, 0 // CHECK: [0x00,0x00,0x4b,0xd1,0x80,0x00,0x00,0x00] +// FIXME: Parsing source modifiers v_cvt_f32_f16_e64 v0, -1 -// CHECK: [0x00,0x00,0x4b,0xd1,0xc1,0x00,0x00,0x00] +// CHECK: [0x00,0x00,0x4b,0xd1,0x81,0x00,0x00,0x20] v_cvt_f32_f16_e64 v0, 0.5 // CHECK: [0x00,0x00,0x4b,0xd1,0xf0,0x00,0x00,0x00] +// FIXME: Parsing source modifiers v_cvt_f32_f16_e64 v0, -4.0 -// CHECK: [0x00,0x00,0x4b,0xd1,0xf7,0x00,0x00,0x00] +// CHECK: [0x00,0x00,0x4b,0xd1,0xf6,0x00,0x00,0x20] v_cvt_f32_f16_e64 v0, v0 // CHECK: [0x00,0x00,0x4b,0xd1,0x00,0x01,0x00,0x00] |

