diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/reduction.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/reduction.ll | 112 |
1 files changed, 91 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll index 74ca4a668f9..0c605f79d98 100644 --- a/llvm/test/CodeGen/AMDGPU/reduction.ll +++ b/llvm/test/CodeGen/AMDGPU/reduction.ll @@ -434,12 +434,23 @@ entry: } ; GCN-LABEL: {{^}}reduction_maxnum_v4f16: -; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 - -; VI: v_max_f16_sdwa -; VI-NEXT: v_max_f16_e32 -; VI-NEXT: v_max_f16_e32 +; GFX9: s_waitcnt +; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 +; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 +; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} + +; FIXME: Extra canonicalize leftover +; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_max_f16_e32 v0, [[MAX]], [[TMP]] + +; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0 +; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1 + +; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]] +; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]] +; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_maxnum_v4f16(<4 x half> %vec4) { entry: %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> @@ -451,12 +462,24 @@ entry: } ; GCN-LABEL: {{^}}reduction_minnum_v4f16: -; GFX9: v_pk_min_f16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9: s_waitcnt +; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 +; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 +; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} + +; FIXME: Extra canonicalize leftover +; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_min_f16_e32 v0, [[MIN]], [[TMP]] -; VI: v_min_f16_sdwa -; VI-NEXT: v_min_f16_e32 -; VI-NEXT: v_min_f16_e32 + +; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0 +; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1 + +; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]] +; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]] +; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_minnum_v4f16(<4 x half> %vec4) { entry: %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> @@ -467,13 +490,36 @@ entry: ret half %res } +; FIXME: Need to preserve fast math flags when fmaxnum matched +; directly from the IR to avoid unnecessary quieting. + ; GCN-LABEL: {{^}}reduction_fast_max_pattern_v4f16: -; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; XGFX9: v_pk_max_f16 [[MAX:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; XGFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 + +; XVI: s_waitcnt +; XVI-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; XVI-NEXT: v_max_f16_e32 v0, v0, v1 +; XVI-NEXT: v_max_f16_e32 v0, v0, v2 +; XVI-NEXT: s_setpc_b64 -; VI: v_max_f16_sdwa -; VI-NEXT: v_max_f16_e32 -; VI-NEXT: v_max_f16_e32 +; GFX9: s_waitcnt +; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 +; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 +; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} + +; FIXME: Extra canonicalize leftover +; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_max_f16_e32 v0, [[MAX]], [[TMP]] + +; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0 +; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1 + +; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]] +; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]] +; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) { entry: %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> @@ -486,13 +532,37 @@ entry: ret half %res } +; FIXME: Need to preserve fast math flags when fmaxnum matched +; directly from the IR to avoid unnecessary quieting. + ; GCN-LABEL: {{^}}reduction_fast_min_pattern_v4f16: -; GFX9: v_pk_min_f16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; XGFX9: v_pk_min_f16 [[MIN:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; XGFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 + +; XVI: s_waitcnt +; XVI-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; XVI-NEXT: v_min_f16_e32 v0, v0, v1 +; XVI-NEXT: v_min_f16_e32 v0, v0, v2 +; XVI-NEXT: s_setpc_b64 + +; GFX9: s_waitcnt +; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 +; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 +; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} + +; FIXME: Extra canonicalize leftover +; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_min_f16_e32 v0, [[MIN]], [[TMP]] + + +; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0 +; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1 -; VI: v_min_f16_sdwa -; VI-NEXT: v_min_f16_e32 -; VI-NEXT: v_min_f16_e32 +; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]] +; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]] +; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) { entry: %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> |

