diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 52 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNProcessors.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 54 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 17 |
7 files changed, 147 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ad63561d957..4ff109b7938 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -378,6 +378,18 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts", "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" >; +def FeatureDot5Insts : SubtargetFeature<"dot5-insts", + "HasDot5Insts", + "true", + "Has v_dot2c_f32_f16 instruction" +>; + +def FeatureDot6Insts : SubtargetFeature<"dot6-insts", + "HasDot6Insts", + "true", + "Has v_dot4c_i32_i8 instruction" +>; + def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", "DoesNotSupportSRAMECC", "true", @@ -773,6 +785,41 @@ def FeatureISAVersion10_1_0 : FeatureSet< FeatureDoesNotSupportXNACK, FeatureCodeObjectV3])>; +def FeatureISAVersion10_1_1 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureNSAEncoding, + FeatureWavefrontSize64, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureDoesNotSupportXNACK, + FeatureCodeObjectV3])>; + +def FeatureISAVersion10_1_2 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureNSAEncoding, + FeatureWavefrontSize64, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureLdsMisalignedBug, + FeatureDoesNotSupportXNACK, + FeatureCodeObjectV3])>; + //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { @@ -1015,6 +1062,11 @@ def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, AssemblerPredicate<"FeatureDot2Insts">; +def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, + AssemblerPredicate<"FeatureDot5Insts">; + +def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, + AssemblerPredicate<"FeatureDot6Insts">; def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6b12fb2a349..17222a77a0b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -234,6 +234,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasDLInsts(false), HasDot1Insts(false), HasDot2Insts(false), + HasDot5Insts(false), + HasDot6Insts(false), EnableSRAMECC(false), DoesNotSupportSRAMECC(false), HasNoSdstCMPX(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index ca8dc8c07c6..f5584dc43f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -337,6 +337,8 @@ protected: bool HasDLInsts; bool HasDot1Insts; bool HasDot2Insts; + bool HasDot5Insts; + bool HasDot6Insts; bool EnableSRAMECC; bool DoesNotSupportSRAMECC; bool HasNoSdstCMPX; @@ -705,6 +707,14 @@ public: return HasDot2Insts; } + bool hasDot5Insts() const { + return HasDot5Insts; + } + + bool hasDot6Insts() const { + return HasDot6Insts; + } + bool isSRAMECCEnabled() const { return EnableSRAMECC; } diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 0de8feeeb46..bca8c8374bd 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -171,3 +171,11 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel, def : ProcessorModel<"gfx1010", GFX10SpeedModel, FeatureISAVersion10_1_0.Features >; + +def : ProcessorModel<"gfx1011", GFX10SpeedModel, + FeatureISAVersion10_1_1.Features +>; + +def : ProcessorModel<"gfx1012", GFX10SpeedModel, + FeatureISAVersion10_1_2.Features +>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index bab9f4df53b..9789d7312de 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,6 +93,8 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; } @@ -141,6 +143,8 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; + case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; + case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 53fd5a1c222..4ce181f3f5d 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -318,6 +318,20 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v def VOP_MAC_F16 : VOP_MAC <f16>; def VOP_MAC_F32 : VOP_MAC <f32>; +class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { + let HasClamp = 0; + let HasExtSDWA = 0; + let HasModifiers = 1; + let HasOpSel = 0; + let IsPacked = 0; +} + +def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; +} +def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>; + // Write out to vcc or arbitrary SGPR. def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { let Asm32 = "$vdst, vcc, $src0, $src1"; @@ -634,6 +648,31 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; } // End SubtargetPredicate = HasDLInsts +let Constraints = "$vdst = $src2", + DisableEncoding="$src2", + isConvertibleToThreeAddress = 1, + isCommutable = 1 in { + let SubtargetPredicate = HasDot5Insts in + defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + let SubtargetPredicate = HasDot6Insts in + defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; +} + +let AddedComplexity = 30 in { + def : GCNPat< + (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), + (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot5Insts; + } + def : GCNPat< + (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), + (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot6Insts; + } +} // End AddedComplexity = 30 + let SubtargetPredicate = isGFX10Plus in { def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; @@ -1492,3 +1531,18 @@ defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; } // End SubtargetPredicate = HasDLInsts + +multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : + VOP2_Real_e32_gfx10<op>, + VOP2_Real_dpp_gfx10<op>, + VOP2_Real_dpp8_gfx10<op>; + +let SubtargetPredicate = HasDot5Insts in { + // NB: Opcode conflicts with V_DOT8C_I32_I4 + // This opcode exists in gfx 10.1* only + defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; +} + +let SubtargetPredicate = HasDot6Insts in { + defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; +} diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 30bda397c03..43b6d2a2155 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -412,3 +412,20 @@ defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>; defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>; defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>; defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>; + +let SubtargetPredicate = HasDot2Insts in { + +defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>; +defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>; +defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>; +defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>; +defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>; + +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>; +defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>; + +} // End SubtargetPredicate = HasDot1Insts |