diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-08-02 13:43:53 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-08-02 13:43:53 +0000 |
commit | 1f3977a856a3313a028a2505ec9a59950204a5a1 (patch) | |
tree | aa70ddab0de1562c1dfe68a624c4360b5e3607b4 | |
parent | 36cdcfadcf57ff8f253b0053137f198187fe85c6 (diff) | |
download | bcm5719-llvm-1f3977a856a3313a028a2505ec9a59950204a5a1.tar.gz bcm5719-llvm-1f3977a856a3313a028a2505ec9a59950204a5a1.zip |
DAG: Fix vector widening fcanonicalize
llvm-svn: 338715
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 20 |
2 files changed, 21 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index a7ccb3c05ec..ab868c34805 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2394,6 +2394,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: + case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; case ISD::FMA: diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 440904f9cd2..a3022d1e0b9 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -6,6 +6,7 @@ declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 +declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0 declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0 declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -477,6 +478,25 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> a ret void } +; FIXME: Extra 4th component handled +; GCN-LABEL: {{^}}v_test_canonicalize_var_v3f16: +; GFX9: s_waitcnt +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: s_setpc_b64 + +; VI-DAG: v_max_f16_sdwa [[CANON_ELT3:v[0-9]+]], v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON_ELT2:v[0-9]+]], v1, v1 +; VI-DAG: v_max_f16_sdwa [[CANON_ELT1:v[0-9]+]], v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_e32 [[CANON_ELT0:v[0-9]+]], v0, v0 +; VI-DAG: v_or_b32_e32 v0, [[CANON_ELT0]], [[CANON_ELT1]] +; VI-DAG: v_or_b32_e32 v1, [[CANON_ELT2]], [[CANON_ELT3]] +; VI: s_setpc_b64 +define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) #1 { + %canonicalized = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> %val) + ret <3 x half> %canonicalized +} + ; GCN-LABEL: {{^}}v_test_canonicalize_var_v4f16: ; GFX9: s_waitcnt ; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 |