summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorFarhana Aleen <farhana.aleen@gmail.com>2018-11-01 22:48:19 +0000
committerFarhana Aleen <farhana.aleen@gmail.com>2018-11-01 22:48:19 +0000
commit5853762e5a6d02ef2e7279c418fbc94a6b1c7a21 (patch)
tree1ed5e61d5f472c155f39900455567026c58b086e /llvm/lib/Target
parent73ed607180abb6b075e863422c0530471163b54e (diff)
downloadbcm5719-llvm-5853762e5a6d02ef2e7279c418fbc94a6b1c7a21.tar.gz
bcm5719-llvm-5853762e5a6d02ef2e7279c418fbc94a6b1c7a21.zip
[AMDGPU] Handle the idot8 pattern generated by FE.
Summary: Different variants of idot8 codegen dag patterns are not generated by llvm-tablegen due to a huge increase in the compile time. Support the pattern that clang FE generates after reordering the additions in integer-dot8 source language pattern. Author: FarhanaAleen Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D53937 llvm-svn: 345902
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td9
1 files changed, 9 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index c91d911a283..2efd28b9cd8 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -287,6 +287,15 @@ foreach Type = ["U", "I"] in
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
(!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
+// in the compile time. Directly handle the pattern generated by the FE here.
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+ [7, 1, 2, 3, 4, 5, 6], lhs, y,
+ (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
} // End SubtargetPredicate = HasDLInsts
multiclass VOP3P_Real_vi<bits<10> op> {
OpenPOWER on IntegriCloud