summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-05-02 04:01:39 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-05-02 04:01:39 +0000
commit5cf81677350812f455424b9bdbceb936183bb2c6 (patch)
tree25c6c86916cbdeb43ed07b7a079fc389d63b5363 /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
parent9d744bcde993dffe05149be51a9c69178f7a6887 (diff)
downloadbcm5719-llvm-5cf81677350812f455424b9bdbceb936183bb2c6.tar.gz
bcm5719-llvm-5cf81677350812f455424b9bdbceb936183bb2c6.zip
[AMDGPU] gfx1010 allows VOP3 to have a literal
Differential Revision: https://reviews.llvm.org/D61413 llvm-svn: 359756
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp74
1 files changed, 64 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b4f0037682d..fc4cd6f1e0b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -236,7 +236,7 @@ public:
}
bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
- return isRegClass(RCID) || isInlinableImm(type);
+ return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
}
bool isRegOrImmWithInt16InputMods() const {
@@ -461,7 +461,7 @@ public:
}
bool isVSrcB32() const {
- return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
+ return isVCSrcF32() || isLiteralImm(MVT::i32);
}
bool isVSrcB64() const {
@@ -473,12 +473,11 @@ public:
}
bool isVSrcV2B16() const {
- llvm_unreachable("cannot happen");
- return isVSrcB16();
+ return isVSrcB16() || isLiteralImm(MVT::v2i16);
}
bool isVSrcF32() const {
- return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
+ return isVCSrcF32() || isLiteralImm(MVT::f32);
}
bool isVSrcF64() const {
@@ -490,8 +489,7 @@ public:
}
bool isVSrcV2F16() const {
- llvm_unreachable("cannot happen");
- return isVSrcF16();
+ return isVSrcF16() || isLiteralImm(MVT::v2f16);
}
bool isKImmFP32() const {
@@ -1145,6 +1143,7 @@ private:
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGDim(const MCInst &Inst);
bool validateLdsDirect(const MCInst &Inst);
+ bool validateVOP3Literal(const MCInst &Inst) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -1287,6 +1286,8 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
@@ -1419,8 +1420,14 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
return false;
}
+ // We allow fp literals with f16x2 operands assuming that the specified
+ // literal goes into the lower half and the upper half is zero. We also
+ // require that the literal may be losslesly converted to f16.
+ MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
+ (type == MVT::v2i16)? MVT::i16 : type;
+
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
- return canLosslesslyConvertToFPType(FPLiteral, type);
+ return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
}
bool AMDGPUOperand::isRegClass(unsigned RCID) const {
@@ -1535,7 +1542,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -1562,6 +1571,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
if (isSafeTruncation(Val, 32) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -2419,7 +2430,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
case 2: {
const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
} else {
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
@@ -2919,6 +2932,42 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
return NumLiterals <= 1;
}
+// VOP3 literal is only allowed in GFX10+ and only one can be used
+bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
+ unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+ if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+ unsigned NumLiterals = 0;
+ uint32_t LiteralValue;
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1) break;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
+ continue;
+
+ if (!isInlineConstant(Inst, OpIdx)) {
+ uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ if (NumLiterals == 0 || LiteralValue != Value) {
+ LiteralValue = Value;
+ ++NumLiterals;
+ }
+ }
+ }
+
+ return !NumLiterals ||
+ (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
+}
+
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc) {
if (!validateLdsDirect(Inst)) {
@@ -2931,6 +2980,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"only one literal operand is allowed");
return false;
}
+ if (!validateVOP3Literal(Inst)) {
+ Error(IDLoc,
+ "invalid literal operand");
+ return false;
+ }
if (!validateConstantBusLimitations(Inst)) {
Error(IDLoc,
"invalid operand (violates constant bus restrictions)");
OpenPOWER on IntegriCloud