summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp2
-rw-r--r--llvm/lib/Support/TargetParser.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td52
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h10
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td8
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td54
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td17
9 files changed, 154 insertions, 1 deletions
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 8be86ab4707..c2a29f3ddad 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -412,6 +412,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index 09eb845ddff..0950d42ac78 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -62,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
-constexpr GPUInfo AMDGCNGPUs[34] = {
+constexpr GPUInfo AMDGCNGPUs[36] = {
// Name Canonical Kind Features
// Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
@@ -99,6 +99,8 @@ constexpr GPUInfo AMDGCNGPUs[34] = {
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -197,6 +199,8 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX906: return {9, 0, 6};
case GK_GFX909: return {9, 0, 9};
case GK_GFX1010: return {10, 1, 0};
+ case GK_GFX1011: return {10, 1, 1};
+ case GK_GFX1012: return {10, 1, 2};
default: return {0, 0, 0};
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index ad63561d957..4ff109b7938 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -378,6 +378,18 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
>;
+def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
+ "HasDot5Insts",
+ "true",
+ "Has v_dot2c_f32_f16 instruction"
+>;
+
+def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
+ "HasDot6Insts",
+ "true",
+ "Has v_dot4c_i32_i8 instruction"
+>;
+
def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
"DoesNotSupportSRAMECC",
"true",
@@ -773,6 +785,41 @@ def FeatureISAVersion10_1_0 : FeatureSet<
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
+def FeatureISAVersion10_1_1 : FeatureSet<
+ !listconcat(FeatureGroup.GFX10_1_Bugs,
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize64,
+ FeatureScalarStores,
+ FeatureScalarAtomics,
+ FeatureScalarFlatScratchInsts,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3])>;
+
+def FeatureISAVersion10_1_2 : FeatureSet<
+ !listconcat(FeatureGroup.GFX10_1_Bugs,
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize64,
+ FeatureScalarStores,
+ FeatureScalarAtomics,
+ FeatureScalarFlatScratchInsts,
+ FeatureLdsMisalignedBug,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3])>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -1015,6 +1062,11 @@ def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
AssemblerPredicate<"FeatureDot2Insts">;
+def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
+ AssemblerPredicate<"FeatureDot5Insts">;
+
+def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
+ AssemblerPredicate<"FeatureDot6Insts">;
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 6b12fb2a349..17222a77a0b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -234,6 +234,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDLInsts(false),
HasDot1Insts(false),
HasDot2Insts(false),
+ HasDot5Insts(false),
+ HasDot6Insts(false),
EnableSRAMECC(false),
DoesNotSupportSRAMECC(false),
HasNoSdstCMPX(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ca8dc8c07c6..f5584dc43f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -337,6 +337,8 @@ protected:
bool HasDLInsts;
bool HasDot1Insts;
bool HasDot2Insts;
+ bool HasDot5Insts;
+ bool HasDot6Insts;
bool EnableSRAMECC;
bool DoesNotSupportSRAMECC;
bool HasNoSdstCMPX;
@@ -705,6 +707,14 @@ public:
return HasDot2Insts;
}
+ bool hasDot5Insts() const {
+ return HasDot5Insts;
+ }
+
+ bool hasDot6Insts() const {
+ return HasDot6Insts;
+ }
+
bool isSRAMECCEnabled() const {
return EnableSRAMECC;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 0de8feeeb46..bca8c8374bd 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -171,3 +171,11 @@ def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
def : ProcessorModel<"gfx1010", GFX10SpeedModel,
FeatureISAVersion10_1_0.Features
>;
+
+def : ProcessorModel<"gfx1011", GFX10SpeedModel,
+ FeatureISAVersion10_1_1.Features
+>;
+
+def : ProcessorModel<"gfx1012", GFX10SpeedModel,
+ FeatureISAVersion10_1_2.Features
+>;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index bab9f4df53b..9789d7312de 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -93,6 +93,8 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@@ -141,6 +143,8 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
+ case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
+ case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 53fd5a1c222..4ce181f3f5d 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -318,6 +318,20 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
def VOP_MAC_F16 : VOP_MAC <f16>;
def VOP_MAC_F32 : VOP_MAC <f32>;
+class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
+ let HasClamp = 0;
+ let HasExtSDWA = 0;
+ let HasModifiers = 1;
+ let HasOpSel = 0;
+ let IsPacked = 0;
+}
+
+def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
+ let Src0ModDPP = FPVRegInputMods;
+ let Src1ModDPP = FPVRegInputMods;
+}
+def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>;
+
// Write out to vcc or arbitrary SGPR.
def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
let Asm32 = "$vdst, vcc, $src0, $src1";
@@ -634,6 +648,31 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
} // End SubtargetPredicate = HasDLInsts
+let Constraints = "$vdst = $src2",
+ DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1,
+ isCommutable = 1 in {
+ let SubtargetPredicate = HasDot5Insts in
+ defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
+ let SubtargetPredicate = HasDot6Insts in
+ defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
+}
+
+let AddedComplexity = 30 in {
+ def : GCNPat<
+ (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
+ (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
+ > {
+ let SubtargetPredicate = HasDot5Insts;
+ }
+ def : GCNPat<
+ (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+ (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
+ > {
+ let SubtargetPredicate = HasDot6Insts;
+ }
+} // End AddedComplexity = 30
+
let SubtargetPredicate = isGFX10Plus in {
def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
@@ -1492,3 +1531,18 @@ defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
} // End SubtargetPredicate = HasDLInsts
+
+multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
+ VOP2_Real_e32_gfx10<op>,
+ VOP2_Real_dpp_gfx10<op>,
+ VOP2_Real_dpp8_gfx10<op>;
+
+let SubtargetPredicate = HasDot5Insts in {
+ // NB: Opcode conflicts with V_DOT8C_I32_I4
+ // This opcode exists in gfx 10.1* only
+ defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+}
+
+let SubtargetPredicate = HasDot6Insts in {
+ defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
+}
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 30bda397c03..43b6d2a2155 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -412,3 +412,20 @@ defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;
+
+let SubtargetPredicate = HasDot2Insts in {
+
+defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
+defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
+defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
+defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>;
+defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>;
+
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>;
+defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>;
+
+} // End SubtargetPredicate = HasDot1Insts
OpenPOWER on IntegriCloud