diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/ObjectYAML/ELFYAML.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Support/TargetParser.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 52 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNProcessors.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 54 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 17 |
9 files changed, 154 insertions, 1 deletions
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 8be86ab4707..c2a29f3ddad 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -412,6 +412,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO, BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH); BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH); + BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH); BCase(EF_AMDGPU_XNACK); BCase(EF_AMDGPU_SRAM_ECC); break; diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index 09eb845ddff..0950d42ac78 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -62,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = { // This table should be sorted by the value of GPUKind // Don't bother listing the implicitly true features -constexpr GPUInfo AMDGCNGPUs[34] = { +constexpr GPUInfo AMDGCNGPUs[36] = { // Name Canonical Kind Features // Name {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, @@ -99,6 +99,8 @@ constexpr GPUInfo AMDGCNGPUs[34] = { {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, + {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, }; const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { @@ -197,6 +199,8 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { case GK_GFX906: return {9, 0, 6}; case GK_GFX909: return {9, 0, 9}; case GK_GFX1010: return {10, 1, 0}; + case GK_GFX1011: return {10, 1, 1}; + case GK_GFX1012: return {10, 1, 2}; default: return {0, 0, 0}; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ad63561d957..4ff109b7938 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -378,6 +378,18 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts", "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" >; +def FeatureDot5Insts : SubtargetFeature<"dot5-insts", + "HasDot5Insts", + "true", + "Has v_dot2c_f32_f16 instruction" +>; + +def FeatureDot6Insts : SubtargetFeature<"dot6-insts", + "HasDot6Insts", + "true", + "Has v_dot4c_i32_i8 instruction" +>; + def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", "DoesNotSupportSRAMECC", "true", @@ -773,6 +785,41 @@ def FeatureISAVersion10_1_0 : FeatureSet< FeatureDoesNotSupportXNACK, FeatureCodeObjectV3])>; +def FeatureISAVersion10_1_1 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureNSAEncoding, + FeatureWavefrontSize64, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureDoesNotSupportXNACK, + FeatureCodeObjectV3])>; + +def FeatureISAVersion10_1_2 : FeatureSet< + !listconcat(FeatureGroup.GFX10_1_Bugs, + [FeatureGFX10, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot1Insts, + FeatureDot2Insts, + FeatureDot5Insts, + FeatureDot6Insts, + FeatureNSAEncoding, + FeatureWavefrontSize64, + FeatureScalarStores, + FeatureScalarAtomics, + FeatureScalarFlatScratchInsts, + FeatureLdsMisalignedBug, + FeatureDoesNotSupportXNACK, + FeatureCodeObjectV3])>; + //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { @@ -1015,6 +1062,11 @@ def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, AssemblerPredicate<"FeatureDot2Insts">; +def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, + AssemblerPredicate<"FeatureDot5Insts">; + +def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, + AssemblerPredicate<"FeatureDot6Insts">; def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6b12fb2a349..17222a77a0b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -234,6 +234,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasDLInsts(false), HasDot1Insts(false), HasDot2Insts(false), + HasDot5Insts(false), + HasDot6Insts(false), EnableSRAMECC(false), DoesNotSupportSRAMECC(false), HasNoSdstCMPX(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index ca8dc8c07c6..f5584dc43f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -337,6 +337,8 @@ protected: bool HasDLInsts; bool HasDot1Insts; bool HasDot2Insts; + bool HasDot5Insts; + bool HasDot6Insts; bool EnableSRAMECC; bool DoesNotSupportSRAMECC; bool HasNoSdstCMPX; @@ -705,6 +707,14 @@ public: return HasDot2Insts; } + bool hasDot5Insts() const { + return HasDot5Insts; + } + + bool hasDot6Insts() const { + return HasDot6Insts; + } + bool isSRAMECCEnabled() const { return EnableSRAMECC; } diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 0de8feeeb46..bca8c8374bd 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -171,3 +171,11 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel, def : ProcessorModel<"gfx1010", GFX10SpeedModel, FeatureISAVersion10_1_0.Features >; + +def : ProcessorModel<"gfx1011", GFX10SpeedModel, + FeatureISAVersion10_1_1.Features +>; + +def : ProcessorModel<"gfx1012", GFX10SpeedModel, + FeatureISAVersion10_1_2.Features +>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index bab9f4df53b..9789d7312de 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,6 +93,8 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; } @@ -141,6 +143,8 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; + case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; + case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 53fd5a1c222..4ce181f3f5d 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -318,6 +318,20 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v def VOP_MAC_F16 : VOP_MAC <f16>; def VOP_MAC_F32 : VOP_MAC <f32>; +class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { + let HasClamp = 0; + let HasExtSDWA = 0; + let HasModifiers = 1; + let HasOpSel = 0; + let IsPacked = 0; +} + +def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; +} +def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>; + // Write out to vcc or arbitrary SGPR. def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { let Asm32 = "$vdst, vcc, $src0, $src1"; @@ -634,6 +648,31 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; } // End SubtargetPredicate = HasDLInsts +let Constraints = "$vdst = $src2", + DisableEncoding="$src2", + isConvertibleToThreeAddress = 1, + isCommutable = 1 in { + let SubtargetPredicate = HasDot5Insts in + defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + let SubtargetPredicate = HasDot6Insts in + defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; +} + +let AddedComplexity = 30 in { + def : GCNPat< + (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), + (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot5Insts; + } + def : GCNPat< + (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), + (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) + > { + let SubtargetPredicate = HasDot6Insts; + } +} // End AddedComplexity = 30 + let SubtargetPredicate = isGFX10Plus in { def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; @@ -1492,3 +1531,18 @@ defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; } // End SubtargetPredicate = HasDLInsts + +multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : + VOP2_Real_e32_gfx10<op>, + VOP2_Real_dpp_gfx10<op>, + VOP2_Real_dpp8_gfx10<op>; + +let SubtargetPredicate = HasDot5Insts in { + // NB: Opcode conflicts with V_DOT8C_I32_I4 + // This opcode exists in gfx 10.1* only + defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; +} + +let SubtargetPredicate = HasDot6Insts in { + defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; +} diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 30bda397c03..43b6d2a2155 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -412,3 +412,20 @@ defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>; defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>; defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>; defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>; + +let SubtargetPredicate = HasDot2Insts in { + +defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>; +defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>; +defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>; +defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>; +defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>; + +} // End SubtargetPredicate = HasDot2Insts + +let SubtargetPredicate = HasDot1Insts in { + +defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>; +defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>; + +} // End SubtargetPredicate = HasDot1Insts |