diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 42 | 
5 files changed, 56 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index c67d203d35d..e37fd0a2481 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -269,10 +269,16 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",    "Has v_fmac_f32 and v_xnor_b32 instructions"  >; -def FeatureDotInsts : SubtargetFeature<"dot-insts", -  "HasDotInsts", +def FeatureDot1Insts : SubtargetFeature<"dot1-insts", +  "HasDot1Insts",    "true", -  "Has v_dot* instructions" +  "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" +>; + +def FeatureDot2Insts : SubtargetFeature<"dot2-insts", +  "HasDot2Insts", +  "true", +  "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"  >;  def FeatureSRAMECC : SubtargetFeature<"sram-ecc", @@ -570,7 +576,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<     FeatureFmaMixInsts,     FeatureLDSBankCount32,     FeatureDLInsts, -   FeatureDotInsts, +   FeatureDot1Insts, +   FeatureDot2Insts,     FeatureSRAMECC,     FeatureCodeObjectV3]>; @@ -769,8 +776,11 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,  def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,    AssemblerPredicate<"FeatureDLInsts">; -def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">, -  AssemblerPredicate<"FeatureDotInsts">; +def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, +  AssemblerPredicate<"FeatureDot1Insts">; + +def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, +  AssemblerPredicate<"FeatureDot2Insts">;  def EnableLateCFGStructurize : Predicate< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index c08d1aa1e8a..cdfbfb5154e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -206,7 +206,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,      HasDPP(false),      HasR128A16(false),      HasDLInsts(false), -    HasDotInsts(false), +    HasDot1Insts(false), +    HasDot2Insts(false),      EnableSRAMECC(false),      FlatAddressSpace(false),      FlatInstOffsets(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 5bc14f6419f..789fd2af4b9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -332,7 +332,8 @@ protected:    bool HasDPP;    bool HasR128A16;    bool HasDLInsts; -  bool HasDotInsts; +  bool HasDot1Insts; +  bool HasDot2Insts;    bool EnableSRAMECC;    bool FlatAddressSpace;    bool FlatInstOffsets; @@ -666,8 +667,12 @@ public:      return HasDLInsts;    } -  bool hasDotInsts() const { -    return HasDotInsts; +  bool hasDot1Insts() const { +    return HasDot1Insts; +  } + +  bool hasDot2Insts() const { +    return HasDot2Insts;    }    bool isSRAMECCEnabled() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d6abd183105..17332edcbce 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8708,7 +8708,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,    EVT VT = N->getValueType(0);    SDLoc SL(N); -  if (!Subtarget->hasDotInsts() || VT != MVT::f32) +  if (!Subtarget->hasDot2Insts() || VT != MVT::f32)      return SDValue();    // FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) -> diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 3c3ac93956f..be17f137108 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -238,29 +238,39 @@ class UDot2Pat<Instruction Inst> : GCNPat <         (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),                               (and i32:$src1, (i32 65535)))     ), -  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0)) ->; +  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { +  let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate; +}  class SDot2Pat<Instruction Inst> : GCNPat <    (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),                                           (sra i32:$src1, (i32 16))), i32:$src2),         (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),                               (sext_inreg i32:$src1, i16))), -  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0)) ->; +  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { +  let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate; +} -let SubtargetPredicate = HasDotInsts in { +let SubtargetPredicate = HasDot2Insts in {  def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;  def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;  def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>; -def V_DOT4_I32_I8  : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;  def V_DOT4_U32_U8  : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; -def V_DOT8_I32_I4  : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;  def V_DOT8_U32_U4  : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +def V_DOT4_I32_I8  : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +def V_DOT8_I32_I4  : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; + +} // End SubtargetPredicate = HasDot1Insts +  multiclass DotPats<SDPatternOperator dot_op,                     VOP3PInst dot_inst> { +  let SubtargetPredicate = dot_inst.SubtargetPredicate in    def : GCNPat <      (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),              (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)), @@ -280,12 +290,14 @@ def : UDot2Pat<V_DOT2_U32_U16>;  def : SDot2Pat<V_DOT2_I32_I16>;  foreach Type = ["U", "I"] in +  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in    def : GCNPat <      !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,                        (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),      (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;  foreach Type = ["U", "I"] in +  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in    def : GCNPat <      !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),                        [1, 2, 3, 4, 5, 6, 7], lhs, y, @@ -295,14 +307,13 @@ foreach Type = ["U", "I"] in  // Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase  // in the compile time. Directly handle the pattern generated by the FE here.  foreach Type = ["U", "I"] in +  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in    def : GCNPat <      !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),                        [7, 1, 2, 3, 4, 5, 6], lhs, y,                        (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),      (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; -} // End SubtargetPredicate = HasDotInsts -  multiclass VOP3P_Real_vi<bits<10> op> {    def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,              VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> { @@ -351,14 +362,19 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;  } -let SubtargetPredicate = HasDotInsts in { +let SubtargetPredicate = HasDot2Insts in {  defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;  defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;  defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>; -defm V_DOT4_I32_I8  : VOP3P_Real_vi <0x3a8>;  defm V_DOT4_U32_U8  : VOP3P_Real_vi <0x3a9>; -defm V_DOT8_I32_I4  : VOP3P_Real_vi <0x3aa>;  defm V_DOT8_U32_U4  : VOP3P_Real_vi <0x3ab>; -} // End SubtargetPredicate = HasDotInsts +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +defm V_DOT4_I32_I8  : VOP3P_Real_vi <0x3a8>; +defm V_DOT8_I32_I4  : VOP3P_Real_vi <0x3aa>; + +} // End SubtargetPredicate = HasDot1Insts  | 

