diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fabs.f16.ll | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fcmp.f16.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 53 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg.f16.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fpext.f16.ll | 199 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 61 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/v_mac_f16.ll | 124 |
7 files changed, 375 insertions, 106 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll index c64aa6228c7..77c941356b9 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll @@ -19,8 +19,7 @@ define void @fabs_free_f16(half addrspace(1)* %out, i16 %in) { ; GCN-LABEL: {{^}}fabs_f16: ; CI: flat_load_ushort [[VAL:v[0-9]+]], -; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] -; CI: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], |[[CVT0]]| +; CI: v_and_b32_e32 [[CVT0:v[0-9]+]], 0x7fff, [[VAL]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define void @fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) @@ -30,10 +29,10 @@ define void @fabs_f16(half addrspace(1)* %out, half %in) { ; FIXME: Should be able to use single and ; GCN-LABEL: {{^}}fabs_v2f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| + +; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI: flat_load_ushort [[LO:v[0-9]+]] ; VI: flat_load_ushort [[HI:v[0-9]+]] @@ -51,10 +50,11 @@ define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { } ; GCN-LABEL: {{^}}fabs_v4f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} ; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} @@ -72,9 +72,10 @@ define void @fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { ; GCN-LABEL: {{^}}fabs_fold_f16: ; GCN: flat_load_ushort [[IN0:v[0-9]+]] ; GCN: flat_load_ushort [[IN1:v[0-9]+]] + ; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]] -; CI-DAG: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], [[IN1]] -; CI: v_mul_f32_e64 [[RESULT:v[0-9]+]], |[[CVT1]]|, [[CVT0]] +; CI-DAG: v_cvt_f32_f16_e64 [[ABS_CVT1:v[0-9]+]], |[[IN1]]| +; CI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[CVT0]], [[ABS_CVT1]] ; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll index a62726f7f06..8a01ea2fe43 100644 --- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll @@ -28,10 +28,10 @@ entry: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] +; SI: v_cvt_f32_f16_e64 v[[A_F32:[0-9]+]], |v[[A_F16]]| +; SI: v_cvt_f32_f16_e64 v[[B_F32:[0-9]+]], |v[[B_F16]]| -; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]| +; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] ; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]| ; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index d7d21311c1b..240fd071000 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -3,8 +3,8 @@ ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 -; CI: v_cvt_f32_f16_e32 -; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}| +; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}} ; VI-NOT: and ; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}| @@ -17,14 +17,15 @@ define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) { } ; GCN-LABEL: {{^}}fneg_fabs_fmul_f16: -; CI: v_cvt_f32_f16_e32 -; CI: v_cvt_f32_f16_e32 -; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| +; CI-DAG: v_cvt_f32_f16_e32 +; CI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG_ABS_X:v[0-9]+]], -|{{v[0-9]+}}| +; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}} ; CI: v_cvt_f16_f32_e32 ; VI-NOT: and -; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| -; VI-NOT: and +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}| +; VI-NOT: [[MUL]] +; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) { %fabs = call half @llvm.fabs.f16(half %x) %fsub = fsub half -0.000000e+00, %fabs @@ -49,10 +50,7 @@ define void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) { ; FIXME: Should use or ; GCN-LABEL: {{^}}fneg_fabs_f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) %fsub = fsub half -0.000000e+00, %fabs @@ -61,10 +59,7 @@ define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { } ; GCN-LABEL: {{^}}v_fneg_fabs_f16: -; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { %val = load half, half addrspace(1)* %in, align 2 %fabs = call half @llvm.fabs.f16(half %val) @@ -75,13 +70,10 @@ define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { ; FIXME: single bit op ; GCN-LABEL: {{^}}fneg_fabs_v2f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: flat_store_dword +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: store_dword define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fsub = fsub <2 x half> <half -0.000000e+00, half -0.000000e+00>, %fabs @@ -90,17 +82,12 @@ define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { } ; GCN-LABEL: {{^}}fneg_fabs_v4f16: -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| -; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| - -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: flat_store_dwordx2 +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; GCN: store_dwordx2 define void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) %fsub = fsub <4 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %fabs diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll index e3dfd9201a2..d545cc789d8 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll @@ -15,13 +15,9 @@ define void @s_fneg_f16(half addrspace(1)* %out, half %in) { ; FUNC-LABEL: {{^}}v_fneg_f16: ; GCN: flat_load_ushort [[VAL:v[0-9]+]], - -; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] -; CI: v_cvt_f16_f32_e64 [[CVT1:v[0-9]+]], -[[CVT0]] -; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]] - -; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]] +; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]] ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]] +; SI: buffer_store_short [[XOR]] define void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) { %val = load half, half addrspace(1)* %in, align 2 %fneg = fsub half -0.000000e+00, %val @@ -45,8 +41,9 @@ define void @fneg_free_f16(half addrspace(1)* %out, i16 %in) { ; FUNC-LABEL: {{^}}v_fneg_fold_f16: ; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]] -; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[CVT0]] -; CI: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[CVT0]], [[CVT0]] +; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]] +; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]] +; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_VAL]], [[NEG_CVT0]] ; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]] ; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]] diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll index c4f5d7cdfb5..433fdf1e075 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll @@ -68,3 +68,202 @@ entry: store <2 x double> %r.val, <2 x double> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32: +; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}} +define void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) { +entry: + %a.trunc = trunc i32 %a to i16 + %a.val = bitcast i16 %a.trunc to half + %r.val = fpext half %a.val to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]] +define void @fneg_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.neg = fsub half -0.0, %a.val + %r.val = fpext half %a.neg to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fabs_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]| +define void @fabs_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %r.val = fpext half %a.fabs to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_fabs_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]| +define void @fneg_fabs_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %a.fneg.fabs = fsub half -0.0, %a.fabs + %r.val = fpext half %a.fneg.fabs to float + store float %r.val, float addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_multi_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[A]] + +; FIXME: Using the source modifier here only wastes code size +; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]] +; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] + +; GCN: store_dword [[CVT]] +; GCN: store_short [[XOR]] +define void @fneg_multi_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.neg = fsub half -0.0, %a.val + %r.val = fpext half %a.neg to float + store volatile float %r.val, float addrspace(1)* %r + store volatile half %a.neg, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fneg_multi_foldable_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_cvt_f32_f16_e64 [[CVTA_NEG:v[0-9]+]], -[[A]] +; SI-DAG: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]] +; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA]], [[CVTA_NEG]] +; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]] + +; VI-DAG: v_cvt_f32_f16_e64 [[CVT_NEGA:v[0-9]+]], -[[A]] +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], -[[A]], [[A]] + +; GCN: buffer_store_dword [[CVTA_NEG]] +; GCN: buffer_store_short [[MUL]] +define void @fneg_multi_foldable_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.neg = fsub half -0.0, %a.val + %r.val = fpext half %a.neg to float + %mul = fmul half %a.neg, %a.val + store volatile float %r.val, float addrspace(1)* %r + store volatile half %mul, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_multi_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]] + +; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]] +; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]| + +; GCN: store_dword [[CVT]] +; GCN: store_short [[XOR]] +define void @fabs_multi_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %r.val = fpext half %a.fabs to float + store volatile float %r.val, float addrspace(1)* %r + store volatile half %a.fabs, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_multi_foldable_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]] +; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], |[[CVTA]]|, [[CVTA]] +; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]] +; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[CVTA]] + +; VI-DAG: v_cvt_f32_f16_e64 [[ABS_A:v[0-9]+]], |[[A]]| +; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], |[[A]]|, [[A]] + +; GCN: buffer_store_dword [[ABS_A]] +; GCN: buffer_store_short [[MUL]] +define void @fabs_multi_foldable_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %r.val = fpext half %a.fabs to float + %mul = fmul half %a.fabs, %a.val + store volatile float %r.val, float addrspace(1)* %r + store volatile half %mul, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_fneg_multi_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]] + +; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]] +; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]| + +; GCN: buffer_store_dword [[CVT]] +; GCN: buffer_store_short [[OR]] +define void @fabs_fneg_multi_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %a.fneg.fabs = fsub half -0.0, %a.fabs + %r.val = fpext half %a.fneg.fabs to float + store volatile float %r.val, float addrspace(1)* %r + store volatile half %a.fneg.fabs, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}fabs_fneg_multi_foldable_use_fpext_f16_to_f32: +; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]] +; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]] +; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], -|[[CVTA]]|, [[CVTA]] +; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]] +; SI: v_or_b32_e32 [[FABS_FNEG:v[0-9]+]], 0x80000000, [[CVTA]] + +; VI-DAG: v_cvt_f32_f16_e64 [[FABS_FNEG:v[0-9]+]], -|[[A]]| +; VI-DAG: v_mul_f16_e64 [[MUL:v[0-9]+]], -|[[A]]|, [[A]] + +; GCN: buffer_store_dword [[FABS_FNEG]] +; GCN: buffer_store_short [[MUL]] +define void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32( + float addrspace(1)* %r, + half addrspace(1)* %a) { +entry: + %a.val = load half, half addrspace(1)* %a + %a.fabs = call half @llvm.fabs.f16(half %a.val) + %a.fneg.fabs = fsub half -0.0, %a.fabs + %r.val = fpext half %a.fneg.fabs to float + %mul = fmul half %a.fneg.fabs, %a.val + store volatile float %r.val, float addrspace(1)* %r + store volatile half %mul, half addrspace(1)* undef + ret void +} + +declare half @llvm.fabs.f16(half) #1 + +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index 284fc53c824..c9905d5f7ff 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; GCN-LABEL: {{^}}fptrunc_f32_to_f16 +; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[R_F16]] @@ -16,7 +16,7 @@ entry: ret void } -; GCN-LABEL: {{^}}fptrunc_f64_to_f16 +; GCN-LABEL: {{^}}fptrunc_f64_to_f16: ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}} ; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}} ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]] @@ -32,7 +32,7 @@ entry: ret void } -; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16 +; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16: ; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}} ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]] ; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] @@ -51,7 +51,7 @@ entry: ret void } -; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16 +; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16: ; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}} ; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}} ; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}} @@ -70,3 +70,56 @@ entry: store <2 x half> %r.val, <2 x half> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16: +; GCN: buffer_load_dword v[[A_F32:[0-9]+]] +; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]] +; GCN: buffer_store_short v[[R_F16]] +; GCN: s_endpgm +define void @fneg_fptrunc_f32_to_f16( + half addrspace(1)* %r, + float addrspace(1)* %a) { +entry: + %a.val = load float, float addrspace(1)* %a + %a.fneg = fsub float -0.0, %a.val + %r.val = fptrunc float %a.fneg to half + store half %r.val, half addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16: +; GCN: buffer_load_dword v[[A_F32:[0-9]+]] +; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]| +; GCN: buffer_store_short v[[R_F16]] +; GCN: s_endpgm +define void @fabs_fptrunc_f32_to_f16( + half addrspace(1)* %r, + float addrspace(1)* %a) { +entry: + %a.val = load float, float addrspace(1)* %a + %a.fabs = call float @llvm.fabs.f32(float %a.val) + %r.val = fptrunc float %a.fabs to half + store half %r.val, half addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16: +; GCN: buffer_load_dword v[[A_F32:[0-9]+]] +; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]| +; GCN: buffer_store_short v[[R_F16]] +; GCN: s_endpgm +define void @fneg_fabs_fptrunc_f32_to_f16( + half addrspace(1)* %r, + float addrspace(1)* %a) { +entry: + %a.val = load float, float addrspace(1)* %a + %a.fabs = call float @llvm.fabs.f32(float %a.val) + %a.fneg.fabs = fsub float -0.0, %a.fabs + %r.val = fptrunc float %a.fneg.fabs to half + store half %r.val, half addrspace(1)* %r + ret void +} + +declare float @llvm.fabs.f32(float) #1 + +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll index 20c1d2310d3..413f3f337d3 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -31,9 +31,10 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_same_add +; GCN-LABEL: {{^}}mac_f16_same_add: ; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] ; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] ; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm @@ -63,9 +64,11 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_a -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_f16_neg_a: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm @@ -87,9 +90,10 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_b -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_f16_neg_b: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: s_endpgm @@ -111,9 +115,12 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_c -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_f16_neg_c: +; SI: v_cvt_f32_f16_e32 +; SI-DAG: v_cvt_f32_f16_e32 +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} ; GCN: s_endpgm @@ -207,9 +214,11 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} +; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} ; GCN: s_endpgm @@ -231,9 +240,11 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} +; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math: +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}} +; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} ; GCN: s_endpgm @@ -255,9 +266,12 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} +; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math: +; SI: v_cvt_f32_f16_e32 +; SI: v_cvt_f32_f16_e32 +; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}} +; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} ; GCN: s_endpgm @@ -279,7 +293,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16 +; GCN-LABEL: {{^}}mac_v2f16: ; GCN: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]] ; GCN: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]] ; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]] @@ -322,7 +336,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_same_add +; GCN-LABEL: {{^}}mac_v2f16_same_add: ; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]] ; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]] ; SI: v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}} @@ -358,10 +372,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_a -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_v2f16_neg_a: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -385,9 +402,12 @@ entry: } ; GCN-LABEL: {{^}}mac_v2f16_neg_b -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -410,10 +430,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} +; GCN-LABEL: {{^}}mac_v2f16_neg_c: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}} +; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} @@ -464,7 +487,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math +; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math: ; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} @@ -492,7 +515,7 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math +; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math: ; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; SI: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} @@ -520,10 +543,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} +; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} @@ -546,10 +572,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} -; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} +; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]] +; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]] + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} @@ -572,10 +601,13 @@ entry: ret void } -; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math -; SI-NOT: v_mac_f32 -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} -; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} +; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math: +; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}} +; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}} + +; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}} +; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}} + ; VI-NOT: v_mac_f16 ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} ; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} |