diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 8 |
5 files changed, 22 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 0aacedf24e3..a5b0eacb5d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -267,7 +267,13 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", def FeatureDLInsts : SubtargetFeature<"dl-insts", "HasDLInsts", "true", - "Has deep learning instructions" + "Has v_fmac_f32 and v_xnor_b32 instructions" +>; + +def FeatureDotInsts : SubtargetFeature<"dot-insts", + "HasDotInsts", + "true", + "Has v_dot* instructions" >; def FeatureSRAMECC : SubtargetFeature<"sram-ecc", @@ -558,6 +564,7 @@ def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6, FeatureFmaMixInsts, FeatureLDSBankCount32, FeatureDLInsts, + FeatureDotInsts, FeatureSRAMECC, FeatureCodeObjectV3]>; @@ -756,6 +763,9 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, AssemblerPredicate<"FeatureDLInsts">; +def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">, + AssemblerPredicate<"FeatureDotInsts">; + def EnableLateCFGStructurize : Predicate< "EnableLateStructurizeCFG">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index f1acd72b03a..10b7f44a2c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -198,6 +198,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasDPP(false), HasR128A16(false), HasDLInsts(false), + HasDotInsts(false), EnableSRAMECC(false), FlatAddressSpace(false), FlatInstOffsets(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 886aca42b6c..51be81a7a81 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -353,6 +353,7 @@ protected: bool HasDPP; bool HasR128A16; bool HasDLInsts; + bool HasDotInsts; bool EnableSRAMECC; bool FlatAddressSpace; bool FlatInstOffsets; @@ -680,6 +681,10 @@ public: return HasDLInsts; } + bool hasDotInsts() const { + return HasDotInsts; + } + bool isSRAMECCEnabled() const { return EnableSRAMECC; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0e2e3b04d87..6374792fee8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8385,7 +8385,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N, EVT VT = N->getValueType(0); SDLoc SL(N); - if (!Subtarget->hasDLInsts() || VT != MVT::f32) + if (!Subtarget->hasDotInsts() || VT != MVT::f32) return SDValue(); // FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) -> diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 0d25a86da32..91b45583c84 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -250,7 +250,7 @@ class SDot2Pat<Instruction Inst> : GCNPat < (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0)) >; -let SubtargetPredicate = HasDLInsts in { +let SubtargetPredicate = HasDotInsts in { def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>; def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>; @@ -302,7 +302,7 @@ foreach Type = ["U", "I"] in (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; -} // End SubtargetPredicate = HasDLInsts +} // End SubtargetPredicate = HasDotInsts multiclass VOP3P_Real_vi<bits<10> op> { def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, @@ -352,7 +352,7 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>; } -let SubtargetPredicate = HasDLInsts in { +let SubtargetPredicate = HasDotInsts in { defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>; defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>; @@ -362,4 +362,4 @@ defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>; defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>; defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>; -} // End SubtargetPredicate = HasDLInsts +} // End SubtargetPredicate = HasDotInsts |

