summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td8
5 files changed, 22 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0aacedf24e3..a5b0eacb5d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -267,7 +267,13 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
def FeatureDLInsts : SubtargetFeature<"dl-insts",
"HasDLInsts",
"true",
- "Has deep learning instructions"
+ "Has v_fmac_f32 and v_xnor_b32 instructions"
+>;
+
+def FeatureDotInsts : SubtargetFeature<"dot-insts",
+ "HasDotInsts",
+ "true",
+ "Has v_dot* instructions"
>;
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
@@ -558,6 +564,7 @@ def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts,
+ FeatureDotInsts,
FeatureSRAMECC,
FeatureCodeObjectV3]>;
@@ -756,6 +763,9 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
AssemblerPredicate<"FeatureDLInsts">;
+def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
+ AssemblerPredicate<"FeatureDotInsts">;
+
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index f1acd72b03a..10b7f44a2c1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -198,6 +198,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDPP(false),
HasR128A16(false),
HasDLInsts(false),
+ HasDotInsts(false),
EnableSRAMECC(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 886aca42b6c..51be81a7a81 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -353,6 +353,7 @@ protected:
bool HasDPP;
bool HasR128A16;
bool HasDLInsts;
+ bool HasDotInsts;
bool EnableSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
@@ -680,6 +681,10 @@ public:
return HasDLInsts;
}
+ bool hasDotInsts() const {
+ return HasDotInsts;
+ }
+
bool isSRAMECCEnabled() const {
return EnableSRAMECC;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0e2e3b04d87..6374792fee8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8385,7 +8385,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc SL(N);
- if (!Subtarget->hasDLInsts() || VT != MVT::f32)
+ if (!Subtarget->hasDotInsts() || VT != MVT::f32)
return SDValue();
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 0d25a86da32..91b45583c84 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -250,7 +250,7 @@ class SDot2Pat<Instruction Inst> : GCNPat <
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
>;
-let SubtargetPredicate = HasDLInsts in {
+let SubtargetPredicate = HasDotInsts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
@@ -302,7 +302,7 @@ foreach Type = ["U", "I"] in
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
(!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
-} // End SubtargetPredicate = HasDLInsts
+} // End SubtargetPredicate = HasDotInsts
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -352,7 +352,7 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
}
-let SubtargetPredicate = HasDLInsts in {
+let SubtargetPredicate = HasDotInsts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
@@ -362,4 +362,4 @@ defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
-} // End SubtargetPredicate = HasDLInsts
+} // End SubtargetPredicate = HasDotInsts
OpenPOWER on IntegriCloud