diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 19 |
2 files changed, 21 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e970f0bec89..0df268d1f99 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -589,6 +589,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMUL, MVT::v4f16, Custom); setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom); setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom); setOperationAction(ISD::SELECT, MVT::v4i16, Custom); setOperationAction(ISD::SELECT, MVT::v4f16, Custom); @@ -3575,6 +3576,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerDEBUGTRAP(Op, DAG); case ISD::FABS: case ISD::FNEG: + case ISD::FCANONICALIZE: return splitUnaryVectorOp(Op, DAG); case ISD::SHL: case ISD::SRA: diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 52d891964c4..440904f9cd2 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -6,6 +6,7 @@ declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 +declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0 declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -476,6 +477,24 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> a ret void } +; GCN-LABEL: {{^}}v_test_canonicalize_var_v4f16: +; GFX9: s_waitcnt +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: s_setpc_b64 + +; VI-DAG: v_max_f16_sdwa [[CANON_ELT3:v[0-9]+]], v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON_ELT2:v[0-9]+]], v1, v1 +; VI-DAG: v_max_f16_sdwa [[CANON_ELT1:v[0-9]+]], v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON_ELT0:v[0-9]+]], v0, v0 +; VI-DAG: v_or_b32_e32 v0, [[CANON_ELT0]], [[CANON_ELT1]] +; VI-DAG: v_or_b32_e32 v1, [[CANON_ELT2]], [[CANON_ELT3]] +; VI: s_setpc_b64 +define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 { + %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %val) + ret <4 x half> %canonicalized +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" } |