diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-05-02 04:01:39 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-05-02 04:01:39 +0000 |
commit | 5cf81677350812f455424b9bdbceb936183bb2c6 (patch) | |
tree | 25c6c86916cbdeb43ed07b7a079fc389d63b5363 /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | |
parent | 9d744bcde993dffe05149be51a9c69178f7a6887 (diff) | |
download | bcm5719-llvm-5cf81677350812f455424b9bdbceb936183bb2c6.tar.gz bcm5719-llvm-5cf81677350812f455424b9bdbceb936183bb2c6.zip |
[AMDGPU] gfx1010 allows VOP3 to have a literal
Differential Revision: https://reviews.llvm.org/D61413
llvm-svn: 359756
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 74 |
1 files changed, 64 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b4f0037682d..fc4cd6f1e0b 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -236,7 +236,7 @@ public: } bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { - return isRegClass(RCID) || isInlinableImm(type); + return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); } bool isRegOrImmWithInt16InputMods() const { @@ -461,7 +461,7 @@ public: } bool isVSrcB32() const { - return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); + return isVCSrcF32() || isLiteralImm(MVT::i32); } bool isVSrcB64() const { @@ -473,12 +473,11 @@ public: } bool isVSrcV2B16() const { - llvm_unreachable("cannot happen"); - return isVSrcB16(); + return isVSrcB16() || isLiteralImm(MVT::v2i16); } bool isVSrcF32() const { - return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); + return isVCSrcF32() || isLiteralImm(MVT::f32); } bool isVSrcF64() const { @@ -490,8 +489,7 @@ public: } bool isVSrcV2F16() const { - llvm_unreachable("cannot happen"); - return isVSrcF16(); + return isVSrcF16() || isLiteralImm(MVT::v2f16); } bool isKImmFP32() const { @@ -1145,6 +1143,7 @@ private: bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); bool validateLdsDirect(const MCInst &Inst); + bool validateVOP3Literal(const MCInst &Inst) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -1287,6 +1286,8 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: return &APFloat::IEEEhalf(); default: llvm_unreachable("unsupported fp type"); @@ -1419,8 +1420,14 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { return false; } + // We allow fp literals with f16x2 operands assuming that the specified + // literal goes into the lower half and the upper half is zero. We also + // require that the literal may be losslesly converted to f16. + MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : + (type == MVT::v2i16)? MVT::i16 : type; + APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); - return canLosslesslyConvertToFPType(FPLiteral, type); + return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); } bool AMDGPUOperand::isRegClass(unsigned RCID) const { @@ -1535,7 +1542,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision @@ -1562,6 +1571,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: if (isSafeTruncation(Val, 32) && AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), AsmParser->hasInv2PiInlineImm())) { @@ -2419,7 +2430,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, case 2: { const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || - OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { + OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); } else { return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); @@ -2919,6 +2932,42 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { return NumLiterals <= 1; } +// VOP3 literal is only allowed in GFX10+ and only one can be used +bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + unsigned NumLiterals = 0; + uint32_t LiteralValue; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) + continue; + + if (!isInlineConstant(Inst, OpIdx)) { + uint32_t Value = static_cast<uint32_t>(MO.getImm()); + if (NumLiterals == 0 || LiteralValue != Value) { + LiteralValue = Value; + ++NumLiterals; + } + } + } + + return !NumLiterals || + (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc) { if (!validateLdsDirect(Inst)) { @@ -2931,6 +2980,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "only one literal operand is allowed"); return false; } + if (!validateVOP3Literal(Inst)) { + Error(IDLoc, + "invalid literal operand"); + return false; + } if (!validateConstantBusLimitations(Inst)) { Error(IDLoc, "invalid operand (violates constant bus restrictions)"); |