summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td9
1 files changed, 9 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index c91d911a283..2efd28b9cd8 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -287,6 +287,15 @@ foreach Type = ["U", "I"] in
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
(!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
+// in the compile time. Directly handle the pattern generated by the FE here.
+foreach Type = ["U", "I"] in
+ def : GCNPat <
+ !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+ [7, 1, 2, 3, 4, 5, 6], lhs, y,
+ (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+ (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
} // End SubtargetPredicate = HasDLInsts
multiclass VOP3P_Real_vi<bits<10> op> {
OpenPOWER on IntegriCloud