diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP3PInstructions.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 42 |
1 files changed, 29 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 3c3ac93956f..be17f137108 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -238,29 +238,39 @@ class UDot2Pat<Instruction Inst> : GCNPat < (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)), (and i32:$src1, (i32 65535))) ), - (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0)) ->; + (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { + let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate; +} class SDot2Pat<Instruction Inst> : GCNPat < (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)), (sra i32:$src1, (i32 16))), i32:$src2), (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16), (sext_inreg i32:$src1, i16))), - (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0)) ->; + (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { + let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate; +} -let SubtargetPredicate = HasDotInsts in { +let SubtargetPredicate = HasDot2Insts in { def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>; def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>; def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>; -def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; -def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; + +} // End SubtargetPredicate = HasDot1Insts + multiclass DotPats<SDPatternOperator dot_op, VOP3PInst dot_inst> { + let SubtargetPredicate = dot_inst.SubtargetPredicate in def : GCNPat < (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)), (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)), @@ -280,12 +290,14 @@ def : UDot2Pat<V_DOT2_U32_U16>; def : SDot2Pat<V_DOT2_I32_I16>; foreach Type = ["U", "I"] in + let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in def : GCNPat < !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y, (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))), (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; foreach Type = ["U", "I"] in + let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in def : GCNPat < !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), [1, 2, 3, 4, 5, 6, 7], lhs, y, @@ -295,14 +307,13 @@ foreach Type = ["U", "I"] in // Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase // in the compile time. Directly handle the pattern generated by the FE here. foreach Type = ["U", "I"] in + let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in def : GCNPat < !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), [7, 1, 2, 3, 4, 5, 6], lhs, y, (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; -} // End SubtargetPredicate = HasDotInsts - multiclass VOP3P_Real_vi<bits<10> op> { def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> { @@ -351,14 +362,19 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>; } -let SubtargetPredicate = HasDotInsts in { +let SubtargetPredicate = HasDot2Insts in { defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>; defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>; defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>; -defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>; defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>; -defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>; defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>; -} // End SubtargetPredicate = HasDotInsts +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>; +defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>; + +} // End SubtargetPredicate = HasDot1Insts |

