diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-20 15:08:34 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-20 15:08:34 +0000 |
commit | 0846c125f98ba6e1013b5acd1b161bd32b395bf8 (patch) | |
tree | 79fc1901c99ea344863b882ce9d34bd7a5d012b3 /llvm/lib | |
parent | 36358cd3ed0f27af941074677e530a8c9aea8802 (diff) | |
download | bcm5719-llvm-0846c125f98ba6e1013b5acd1b161bd32b395bf8.tar.gz bcm5719-llvm-0846c125f98ba6e1013b5acd1b161bd32b395bf8.zip |
[AMDGPU] gfx1010 core wave32 changes
Differential Revision: https://reviews.llvm.org/D63204
llvm-svn: 363934
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 34 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPCInstructions.td | 2 |
10 files changed, 56 insertions, 40 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 4ff109b7938..225268d646d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -777,7 +777,7 @@ def FeatureISAVersion10_1_0 : FeatureSet< FeatureLDSBankCount32, FeatureDLInsts, FeatureNSAEncoding, - FeatureWavefrontSize64, + FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, FeatureScalarFlatScratchInsts, @@ -795,7 +795,7 @@ def FeatureISAVersion10_1_1 : FeatureSet< FeatureDot5Insts, FeatureDot6Insts, FeatureNSAEncoding, - FeatureWavefrontSize64, + FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, FeatureScalarFlatScratchInsts, @@ -812,7 +812,7 @@ def FeatureISAVersion10_1_2 : FeatureSet< FeatureDot5Insts, FeatureDot6Insts, FeatureNSAEncoding, - FeatureWavefrontSize64, + FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, FeatureScalarFlatScratchInsts, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e6f2bd77280..4a844695549 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -50,19 +50,19 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4, def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def AMDGPUIfOp : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] + [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] >; def AMDGPUElseOp : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>] + [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] >; def AMDGPULoopOp : SDTypeProfile<0, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>] + [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>] >; def AMDGPUIfBreakOp : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>] + [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>] >; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 6c1ef983fb7..6b3f68c7395 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -101,6 +101,12 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, return addOperand(Inst, MCOperand::createImm(Imm)); } +static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, + uint64_t Addr, const void *Decoder) { + auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); + return addOperand(Inst, DAsm->decodeBoolReg(Val)); +} + #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ static DecodeStatus StaticDecoderName(MCInst &Inst, \ unsigned Imm, \ diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 99e8da67094..f96b03dcd2c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -946,6 +946,15 @@ public: /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; + const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) + const override { + if (OpNum >= TID.getNumOperands()) + return nullptr; + return RI.getRegClass(TID.OpInfo[OpNum].RegClass); + } + void fixImplicitOperands(MachineInstr &MI) const; }; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ac7777de3fb..ba9cd868643 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -766,6 +766,15 @@ def VOPDstS64orS32 : BoolRC { let PrintMethod = "printVOPDst"; } +// SCSrc_i1 is the operand for pseudo instructions only. +// Boolean immeadiates shall not be exposed to codegen instructions. +def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM_INT32"; + let ParserMatchClass = BoolReg; + let DecoderMethod = "decodeBoolReg"; +} + // ===----------------------------------------------------------------------===// // ExpSrc* Special cases for exp src operands which are printed as // "off" depending on en operand. @@ -804,11 +813,12 @@ def SDWASrc_i16 : SDWASrc<i16>; def SDWASrc_f32 : SDWASrc<f32>; def SDWASrc_f16 : SDWASrc<f16>; -def SDWAVopcDst : VOPDstOperand<SReg_64> { +def SDWAVopcDst : BoolRC { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_SDWA_VOPC_DST"; let EncoderMethod = "getSDWAVopcDstEncoding"; let DecoderMethod = "decodeSDWAVopcDst"; + let PrintMethod = "printVOPDst"; } class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { @@ -940,11 +950,6 @@ def f32kimm : kimmOperand<i32>; def KImmFP16MatchClass : KImmMatchClass<16>; def f16kimm : kimmOperand<i16>; - -def VOPDstS64 : VOPDstOperand <SReg_64> { - let PrintMethod = "printVOPDst"; -} - class FPInputModsMatchClass <int opSize> : AsmOperandClass { let Name = "RegOrImmWithFP"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithFPInputMods"; @@ -1237,7 +1242,7 @@ class getVALUDstForVT<ValueType VT> { !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>, - VOPDstOperand<SReg_64>)))); // else VT == i1 + VOPDstS64orS32)))); // else VT == i1 } // Returns the register class to use for the destination of VOP[12C] @@ -1313,7 +1318,7 @@ class getVOP3SrcForVT<ValueType VT> { VSrc_f64, VSrc_b64), !if(!eq(VT.Value, i1.Value), - SCSrc_i1, + SSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), VSrc_f16, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d8738a8b119..630aeeb8777 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -121,14 +121,14 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] -def ENTER_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins i64imm:$src0)> { +def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> { let Defs = [EXEC]; let hasSideEffects = 0; let mayLoad = 0; let mayStore = 0; } -def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> { +def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> { let hasSideEffects = 0; let mayLoad = 0; let mayStore = 0; @@ -161,11 +161,11 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI < >; def S_ADD_U64_CO_PSEUDO : SPseudoInstSI < - (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) + (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) >; def S_SUB_U64_CO_PSEUDO : SPseudoInstSI < - (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) + (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) >; } // End usesCustomInserter = 1, Defs = [SCC] @@ -233,30 +233,30 @@ let isTerminator = 1 in { let OtherPredicates = [EnableLateCFGStructurize] in { def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI < (outs), - (ins SReg_64:$vcc, brtarget:$target), + (ins SReg_1:$vcc, brtarget:$target), [(brcond i1:$vcc, bb:$target)]> { let Size = 12; } } def SI_IF: CFPseudoInstSI < - (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), - [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> { + (outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target), + [(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> { let Constraints = ""; let Size = 12; let hasSideEffects = 1; } def SI_ELSE : CFPseudoInstSI < - (outs SReg_64:$dst), - (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> { + (outs SReg_1:$dst), + (ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> { let Size = 12; let hasSideEffects = 1; } def SI_LOOP : CFPseudoInstSI < - (outs), (ins SReg_64:$saved, brtarget:$target), - [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> { + (outs), (ins SReg_1:$saved, brtarget:$target), + [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> { let Size = 8; let isBranch = 1; let hasSideEffects = 1; @@ -265,8 +265,7 @@ def SI_LOOP : CFPseudoInstSI < } // End isTerminator = 1 def SI_END_CF : CFPseudoInstSI < - (outs), (ins SReg_64:$saved), - [(int_amdgcn_end_cf i64:$saved)], 1, 1> { + (outs), (ins SReg_1:$saved), [], 1, 1> { let Size = 4; let isAsCheapAsAMove = 1; let isReMaterializable = 1; @@ -276,8 +275,7 @@ def SI_END_CF : CFPseudoInstSI < } def SI_IF_BREAK : CFPseudoInstSI < - (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src), - [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> { + (outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> { let Size = 4; let isAsCheapAsAMove = 1; let isReMaterializable = 1; @@ -303,7 +301,7 @@ multiclass PseudoInstKill <dag ins> { } } -defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>; +defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>; defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>; let Defs = [EXEC,VCC] in @@ -322,7 +320,7 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> { } def SI_PS_LIVE : PseudoInstSI < - (outs SReg_64:$dst), (ins), + (outs SReg_1:$dst), (ins), [(set i1:$dst, (int_amdgcn_ps_live))]> { let SALU = 1; } @@ -584,7 +582,7 @@ def : GCNPat< >; def : GCNPat< - (AMDGPUelse i64:$src, bb:$target), + (AMDGPUelse i1:$src, bb:$target), (SI_ELSE $src, $target, 0) >; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 8df3c313616..32e3575baeb 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -733,8 +733,6 @@ def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> { defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; -def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>; - //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index e7be776817f..1465f6ba209 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -344,7 +344,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); } // Write out to vcc or arbitrary SGPR and read in from vcc or @@ -358,7 +358,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=* let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index a15c0571ae7..74d1c636065 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -183,7 +183,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { let HasModifiers = 0; let HasClamp = 0; let HasOMod = 0; - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } @@ -203,7 +203,7 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { // FIXME: Hack to stop printing _e64 let DstRC = RegisterOperand<VReg_64>; - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp"; } diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index fb4370af024..b3513e383d1 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -56,7 +56,7 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt let Asm32 = "$src0, $src1"; // The destination for 32-bit encoding is implicit. let HasDst32 = 0; - let Outs64 = (outs VOPDstS64:$sdst); + let Outs64 = (outs VOPDstS64orS32:$sdst); list<SchedReadWrite> Schedule = sched; } |