diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 117 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 |
7 files changed, 110 insertions, 47 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0dff52668c6..74f34297697 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -415,6 +415,11 @@ public: return isSSrcF16(); } + bool isSSrcOrLdsB32() const { + return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || + isLiteralImm(MVT::i32) || isExpr(); + } + bool isVCSrcB32() const { return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -2477,6 +2482,73 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { return true; } +static bool IsRevOpcode(const unsigned Opcode) +{ + switch (Opcode) { + case AMDGPU::V_SUBREV_F32_e32: + case AMDGPU::V_SUBREV_F32_e64: + case AMDGPU::V_SUBREV_F32_e32_si: + case AMDGPU::V_SUBREV_F32_e32_vi: + case AMDGPU::V_SUBREV_F32_e64_si: + case AMDGPU::V_SUBREV_F32_e64_vi: + case AMDGPU::V_SUBREV_I32_e32: + case AMDGPU::V_SUBREV_I32_e64: + case AMDGPU::V_SUBREV_I32_e32_si: + case AMDGPU::V_SUBREV_I32_e64_si: + case AMDGPU::V_SUBBREV_U32_e32: + case AMDGPU::V_SUBBREV_U32_e64: + case AMDGPU::V_SUBBREV_U32_e32_si: + case AMDGPU::V_SUBBREV_U32_e32_vi: + case AMDGPU::V_SUBBREV_U32_e64_si: + case AMDGPU::V_SUBBREV_U32_e64_vi: + case AMDGPU::V_SUBREV_U32_e32: + case AMDGPU::V_SUBREV_U32_e64: + case AMDGPU::V_SUBREV_U32_e32_gfx9: + case AMDGPU::V_SUBREV_U32_e32_vi: + case AMDGPU::V_SUBREV_U32_e64_gfx9: + case AMDGPU::V_SUBREV_U32_e64_vi: + case AMDGPU::V_SUBREV_F16_e32: + case AMDGPU::V_SUBREV_F16_e64: + case AMDGPU::V_SUBREV_F16_e32_vi: + case AMDGPU::V_SUBREV_F16_e64_vi: + case AMDGPU::V_SUBREV_U16_e32: + case AMDGPU::V_SUBREV_U16_e64: + case AMDGPU::V_SUBREV_U16_e32_vi: + case AMDGPU::V_SUBREV_U16_e64_vi: + case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: + case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: + case AMDGPU::V_LSHLREV_B32_e32_si: + case AMDGPU::V_LSHLREV_B32_e64_si: + case AMDGPU::V_LSHLREV_B16_e32_vi: + case AMDGPU::V_LSHLREV_B16_e64_vi: + case AMDGPU::V_LSHLREV_B32_e32_vi: + case AMDGPU::V_LSHLREV_B32_e64_vi: + case AMDGPU::V_LSHLREV_B64_vi: + case AMDGPU::V_LSHRREV_B32_e32_si: + case AMDGPU::V_LSHRREV_B32_e64_si: + case AMDGPU::V_LSHRREV_B16_e32_vi: + case AMDGPU::V_LSHRREV_B16_e64_vi: + case AMDGPU::V_LSHRREV_B32_e32_vi: + case AMDGPU::V_LSHRREV_B32_e64_vi: + case AMDGPU::V_LSHRREV_B64_vi: + case AMDGPU::V_ASHRREV_I32_e64_si: + case AMDGPU::V_ASHRREV_I32_e32_si: + case AMDGPU::V_ASHRREV_I16_e32_vi: + case AMDGPU::V_ASHRREV_I16_e64_vi: + case AMDGPU::V_ASHRREV_I32_e32_vi: + case AMDGPU::V_ASHRREV_I32_e64_vi: + case AMDGPU::V_ASHRREV_I64_vi: + case AMDGPU::V_PK_LSHLREV_B16_vi: + case AMDGPU::V_PK_LSHRREV_B16_vi: + case AMDGPU::V_PK_ASHRREV_I16_vi: + return true; + default: + return false; + } +} + bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { using namespace SIInstrFlags; @@ -2511,50 +2583,7 @@ bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { return true; // lds_direct is specified as src0. Check additional limitations. - - // FIXME: This is a workaround for bug 37943 - // which allows 64-bit VOP3 opcodes use 32-bit operands. - if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4) - return false; - - // Documentation does not disable lds_direct for SDWA, but SP3 assembler does. - // FIXME: This inconsistence needs to be investigated further. - if (Desc.TSFlags & SIInstrFlags::SDWA) - return false; - - // The following opcodes do not accept lds_direct which is explicitly stated - // in AMD documentation. However SP3 disables lds_direct for most other 'rev' - // opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32). - // FIXME: This inconsistence needs to be investigated further. - switch (Opcode) { - case AMDGPU::V_LSHLREV_B32_e32_si: - case AMDGPU::V_LSHLREV_B32_e64_si: - case AMDGPU::V_LSHLREV_B16_e32_vi: - case AMDGPU::V_LSHLREV_B16_e64_vi: - case AMDGPU::V_LSHLREV_B32_e32_vi: - case AMDGPU::V_LSHLREV_B32_e64_vi: - case AMDGPU::V_LSHLREV_B64_vi: - case AMDGPU::V_LSHRREV_B32_e32_si: - case AMDGPU::V_LSHRREV_B32_e64_si: - case AMDGPU::V_LSHRREV_B16_e32_vi: - case AMDGPU::V_LSHRREV_B16_e64_vi: - case AMDGPU::V_LSHRREV_B32_e32_vi: - case AMDGPU::V_LSHRREV_B32_e64_vi: - case AMDGPU::V_LSHRREV_B64_vi: - case AMDGPU::V_ASHRREV_I32_e64_si: - case AMDGPU::V_ASHRREV_I32_e32_si: - case AMDGPU::V_ASHRREV_I16_e32_vi: - case AMDGPU::V_ASHRREV_I16_e64_vi: - case AMDGPU::V_ASHRREV_I32_e32_vi: - case AMDGPU::V_ASHRREV_I32_e64_vi: - case AMDGPU::V_ASHRREV_I64_vi: - case AMDGPU::V_PK_LSHLREV_B16_vi: - case AMDGPU::V_PK_LSHRREV_B16_vi: - case AMDGPU::V_PK_ASHRREV_I16_vi: - return false; - default: - return true; - } + return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); } bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9ae1bcd44ec..0acd30f5408 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -97,6 +97,7 @@ static DecodeStatus StaticDecoderName(MCInst &Inst, \ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VRegOrLds_32) DECODE_OPERAND_REG(VS_32) DECODE_OPERAND_REG(VS_64) DECODE_OPERAND_REG(VS_128) @@ -108,6 +109,7 @@ DECODE_OPERAND_REG(VReg_128) DECODE_OPERAND_REG(SReg_32) DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) DECODE_OPERAND_REG(SReg_32_XEXEC_HI) +DECODE_OPERAND_REG(SRegOrLds_32) DECODE_OPERAND_REG(SReg_64) DECODE_OPERAND_REG(SReg_64_XEXEC) DECODE_OPERAND_REG(SReg_128) @@ -469,6 +471,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { + return decodeSrcOp(OPW32, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { return createRegOperand(AMDGPU::VReg_64RegClassID, Val); } @@ -500,6 +506,13 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( return decodeOperand_SReg_32(Val); } +MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { + // table-gen generated disassembler doesn't care about operand types + // leaving only registry class so SSrc_32 operand turns into SReg_32 + // and therefore we accept immediates and literals here as well + return decodeSrcOp(OPW32, Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { return decodeSrcOp(OPW64, Val); } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 4e1c75ff57b..9d7fedb97c2 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -71,6 +71,8 @@ public: DecodeStatus convertMIMGInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; + MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const; + MCOperand decodeOperand_VS_32(unsigned Val) const; MCOperand decodeOperand_VS_64(unsigned Val) const; MCOperand decodeOperand_VS_128(unsigned Val) const; @@ -84,6 +86,7 @@ public: MCOperand decodeOperand_SReg_32(unsigned Val) const; MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const; + MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const; MCOperand decodeOperand_SReg_64(unsigned Val) const; MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_128(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 84751d1cbfe..5f04a8c816c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -442,6 +442,11 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, let AllocationPriority = 7; } +def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; +} + def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 8; @@ -511,6 +516,11 @@ def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, let AllocationPriority = 12; } +def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, + (add VGPR_32, LDS_DIRECT_CLASS)> { + let isAllocatable = 0; +} + // Register class for all vector registers (VGPRs + Interploation Registers) def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> { let Size = 64; @@ -631,6 +641,12 @@ multiclass RegInlineOperand <string rc, string MatchName> defm SSrc : RegImmOperand<"SReg", "SSrc">; +def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM_INT32"; + let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">; +} + //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e1727338599..96128749081 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -802,9 +802,11 @@ unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: + case AMDGPU::VRegOrLds_32RegClassID: case AMDGPU::VS_32RegClassID: case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: + case AMDGPU::SRegOrLds_32RegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 15006155d9b..aafe0339acf 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -142,7 +142,7 @@ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; // TODO: Make profile for this, there is VOP3 encoding also def V_READFIRSTLANE_B32 : InstSI <(outs SReg_32:$vdst), - (ins VGPR_32:$src0), + (ins VRegOrLds_32:$src0), "v_readfirstlane_b32 $vdst, $src0", [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, Enc32 { diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ee165515ce7..9a0a81c97ef 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -360,7 +360,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Outs32 = (outs SReg_32:$vdst); let Outs64 = Outs32; - let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1); + let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; @@ -765,7 +765,7 @@ defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>; defm V_READLANE_B32 : VOP2_Real_si <0x01>; -let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { defm V_WRITELANE_B32 : VOP2_Real_si <0x02>; } |