diff options
author | Farhana Aleen <farhana.aleen@gmail.com> | 2018-11-01 22:48:19 +0000 |
---|---|---|
committer | Farhana Aleen <farhana.aleen@gmail.com> | 2018-11-01 22:48:19 +0000 |
commit | 5853762e5a6d02ef2e7279c418fbc94a6b1c7a21 (patch) | |
tree | 1ed5e61d5f472c155f39900455567026c58b086e /llvm/lib/Target | |
parent | 73ed607180abb6b075e863422c0530471163b54e (diff) | |
download | bcm5719-llvm-5853762e5a6d02ef2e7279c418fbc94a6b1c7a21.tar.gz bcm5719-llvm-5853762e5a6d02ef2e7279c418fbc94a6b1c7a21.zip |
[AMDGPU] Handle the idot8 pattern generated by FE.
Summary: Different variants of idot8 codegen dag patterns are not generated by llvm-tablegen due to a huge
increase in the compile time. Support the pattern that clang FE generates after reordering the
additions in integer-dot8 source language pattern.
Author: FarhanaAleen
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D53937
llvm-svn: 345902
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index c91d911a283..2efd28b9cd8 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -287,6 +287,15 @@ foreach Type = ["U", "I"] in (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; +// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase +// in the compile time. Directly handle the pattern generated by the FE here. +foreach Type = ["U", "I"] in + def : GCNPat < + !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), + [7, 1, 2, 3, 4, 5, 6], lhs, y, + (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), + (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; + } // End SubtargetPredicate = HasDLInsts multiclass VOP3P_Real_vi<bits<10> op> { |