diff options
author | Sam Kolton <Sam.Kolton@amd.com> | 2016-12-22 12:57:41 +0000 |
---|---|---|
committer | Sam Kolton <Sam.Kolton@amd.com> | 2016-12-22 12:57:41 +0000 |
commit | a568e3dde70d832fa8baa6ffd6ad6cb81dce8ac6 (patch) | |
tree | f097450be624e5a18aa615386d9f9a52a64dad57 | |
parent | 2388861f093c4f3cde7fb86814c01d8046c6824d (diff) | |
download | bcm5719-llvm-a568e3dde70d832fa8baa6ffd6ad6cb81dce8ac6.tar.gz bcm5719-llvm-a568e3dde70d832fa8baa6ffd6ad6cb81dce8ac6.zip |
[AMDGPU] Add pseudo SDWA instructions
Summary: This is needed for later SDWA support in CodeGen.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye
Differential Revision: https://reviews.llvm.org/D27412
llvm-svn: 290338
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 39 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 50 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPCInstructions.td | 62 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 81 |
5 files changed, 159 insertions, 85 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 68ab8d676ed..74b188d6b20 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1567,8 +1567,8 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { getForcedEncodingSize() != 64) return Match_PreferE32; - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa || - Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa) { + if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || + Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { // v_mac_f32/16 allow only dst_sel == DWORD; auto OpNum = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); @@ -3445,8 +3445,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa) { - // V_NOP_sdwa has no optional sdwa arguments + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + // V_NOP_sdwa_vi has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); @@ -3473,8 +3473,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, // special case v_mac_{f16, f32}: // it has src2 register operand that is tied to dst operand - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa || - Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa) { + if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || + Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 9d876c39674..bff706cdc1d 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -21,6 +21,15 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 { let Inst{31-25} = 0x3f; //encoding } +class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : InstSI <P.Outs32, P.Ins32, "", pattern>, VOP <opName>, @@ -68,6 +77,11 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : let TSFlags = ps.TSFlags; } +class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : + VOP_SDWA_Pseudo <OpName, P, pattern> { + let AsmMatchConverter = "cvtSdwaVOP1"; +} + class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, @@ -79,6 +93,7 @@ multiclass VOP1Inst <string opName, VOPProfile P, SDPatternOperator node = null_frag> { def _e32 : VOP1_Pseudo <opName, P>; def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; + def _sdwa : VOP1_SDWA_Pseudo <opName, P>; } //===----------------------------------------------------------------------===// @@ -422,23 +437,6 @@ defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>; // VI //===----------------------------------------------------------------------===// -class VOP1_SDWA <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_SDWA <ps.OpName, P> { - let Defs = ps.Defs; - let Uses = ps.Uses; - let SchedRW = ps.SchedRW; - let hasSideEffects = ps.hasSideEffects; - let Constraints = ps.Constraints; - let DisableEncoding = ps.DisableEncoding; - let AsmMatchConverter = "cvtSdwaVOP1"; - - bits<8> vdst; - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = op; - let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); - let Inst{31-25} = 0x3f; // encoding -} - class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> : VOP_DPP <ps.OpName, P> { let Defs = ps.Defs; @@ -465,9 +463,12 @@ multiclass VOP1_Real_vi <bits<10> op> { VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; } - // for now left sdwa/dpp only for asm/dasm + def _sdwa_vi : + VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; + + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo - def _sdwa : VOP1_SDWA<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index a3025ffe2b3..0b18e0021f7 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -37,6 +37,17 @@ class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { let Inst{63-32} = imm; } +class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { + bits<8> vdst; + bits<8> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding +} + class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : InstSI <P.Outs32, P.Ins32, "", pattern>, VOP <opName>, @@ -84,6 +95,11 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : let TSFlags = ps.TSFlags; } +class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : + VOP_SDWA_Pseudo <OpName, P, pattern> { + let AsmMatchConverter = "cvtSdwaVOP2"; +} + class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { list<dag> ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -102,8 +118,12 @@ multiclass VOP2Inst <string opName, def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; + + def _sdwa : VOP2_SDWA_Pseudo <opName, P>, + Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>; } +// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst multiclass VOP2bInst <string opName, VOPProfile P, SDPatternOperator node = null_frag, @@ -554,25 +574,6 @@ defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>; // VI //===----------------------------------------------------------------------===// -class VOP2_SDWA <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_SDWA <ps.OpName, P> { - let Defs = ps.Defs; - let Uses = ps.Uses; - let SchedRW = ps.SchedRW; - let hasSideEffects = ps.hasSideEffects; - let Constraints = ps.Constraints; - let DisableEncoding = ps.DisableEncoding; - let AsmMatchConverter = "cvtSdwaVOP2"; - - bits<8> vdst; - bits<8> src1; - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); - let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); - let Inst{30-25} = op; - let Inst{31} = 0x0; // encoding -} - class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : VOP_DPP <ps.OpName, P> { let Defs = ps.Defs; @@ -627,12 +628,17 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" + +multiclass VOP2_SDWA_Real <bits<6> op> { + def _sdwa_vi : + VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; +} multiclass VOP2_Real_e32e64_vi <bits<6> op> : - Base_VOP2_Real_e32e64_vi<op> { - // for now left sdwa/dpp only for asm/dasm + Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> { + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo - def _sdwa : VOP2_SDWA<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; } diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 32137888ef1..f042404acaf 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -21,6 +21,19 @@ class VOPCe <bits<8> op> : Enc32 { let Inst{31-25} = 0x3e; } +class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { + bits<8> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding + + // VOPC disallows dst_sel and dst_unused as they have no effect on destination + let Inst{42-40} = SDWA.DWORD; + let Inst{44-43} = SDWA.UNUSED_PRESERVE; +} + //===----------------------------------------------------------------------===// // VOPC classes //===----------------------------------------------------------------------===// @@ -82,6 +95,11 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily> : let TSFlags = ps.TSFlags; } +class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : + VOP_SDWA_Pseudo <OpName, P, pattern> { + let AsmMatchConverter = "cvtSdwaVOPC"; +} + // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> : InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl { @@ -131,6 +149,7 @@ multiclass VOPC_Pseudos <string opName, let isCompare = 1; let isCommutable = 1; } + def _e64 : VOP3_Pseudo<opName, P, !if(P.HasModifiers, [(set i1:$sdst, @@ -145,6 +164,15 @@ multiclass VOPC_Pseudos <string opName, let isCompare = 1; let isCommutable = 1; } + + def _sdwa : VOPC_SDWA_Pseudo <opName, P>, + Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = P.Schedule; + let isConvergent = DefExec; + let isCompare = 1; + let isCommutable = 1; + } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -471,10 +499,17 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> { let SchedRW = p.Schedule; let isConvergent = DefExec; } + def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret> { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; } + + def _sdwa : VOPC_SDWA_Pseudo <opName, p> { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = p.Schedule; + let isConvergent = DefExec; + } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -822,27 +857,6 @@ defm V_CMPX_CLASS_F64 : VOPC_Real_si <0xb8>; // VI //===----------------------------------------------------------------------===// -class VOPC_SDWA<bits<8> op, VOPC_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_SDWA <ps.OpName, P> { - let Defs = ps.Defs; - let hasSideEffects = ps.hasSideEffects; - let AsmMatchConverter = "cvtSdwaVOPC"; - let isCompare = ps.isCompare; - let isCommutable = ps.isCommutable; - let Constraints = ps.Constraints; - let DisableEncoding = ps.DisableEncoding; - - bits<8> src1; - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); - let Inst{24-17} = op; - let Inst{31-25} = 0x3e; // encoding - - // VOPC disallows dst_sel and dst_unused as they have no effect on destination - let Inst{42-40} = SDWA_DWORD; - let Inst{44-43} = SDWA_UNUSED_PRESERVE; -} - multiclass VOPC_Real_vi <bits<10> op> { let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { def _e32_vi : @@ -859,9 +873,9 @@ multiclass VOPC_Real_vi <bits<10> op> { } } - // for now left sdwa only for asm/dasm - // TODO: add corresponding pseudo - def _sdwa : VOPC_SDWA<op{7-0}, !cast<VOPC_Pseudo>(NAME#"_e32")>; + def _sdwa_vi : + VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>, + VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"), !cast<Instruction>(NAME#"_e32_vi")> { diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 3af16b32695..5f72f97d9e2 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -207,6 +207,22 @@ class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> { let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } +def SDWA { + // sdwa_sel + int BYTE_0 = 0; + int BYTE_1 = 1; + int BYTE_2 = 2; + int BYTE_3 = 3; + int WORD_0 = 4; + int WORD_1 = 5; + int DWORD = 6; + + // dst_unused + int UNUSED_PAD = 0; + int UNUSED_SEXT = 1; + int UNUSED_PRESERVE = 2; +} + class VOP_SDWAe<VOPProfile P> : Enc64 { bits<8> src0; bits<3> src0_sel; @@ -217,37 +233,74 @@ class VOP_SDWAe<VOPProfile P> : Enc64 { bits<2> dst_unused; bits<1> clamp; - bits<3> SDWA_DWORD = 6; - bits<2> SDWA_UNUSED_PRESERVE = 2; - let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); - let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0}); - let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0}); + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); - let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0}); + let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); - let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA_DWORD{2-0}); + let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); } -class VOP_SDWA <string OpName, VOPProfile P> : - InstSI <P.OutsSDWA, P.InsSDWA, OpName#P.AsmSDWA, []>, - VOP_SDWAe<P> { +class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> : + InstSI <P.OutsSDWA, P.InsSDWA, "", pattern>, + VOP <opName>, + SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE>, + MnemonicAlias <opName#"_sdwa", opName> { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.AsmSDWA; + + let Size = 8; let mayLoad = 0; let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; + let hasSideEffects = 0; + let VALU = 1; let SDWA = 1; - let Size = 8; - + let Uses = [EXEC]; + let SubtargetPredicate = isVI; let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst); let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA, AMDGPUAsmVariants.Disable); let DecoderNamespace = "SDWA"; + + VOPProfile Pfl = P; +} + +class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> : + InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, + SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + // Copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AssemblerPredicate = ps.AssemblerPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let DecoderNamespace = ps.DecoderNamespace; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; } class VOP_DPPe<VOPProfile P> : Enc64 { |