diff options
author | Sam Kolton <Sam.Kolton@amd.com> | 2016-09-09 14:44:04 +0000 |
---|---|---|
committer | Sam Kolton <Sam.Kolton@amd.com> | 2016-09-09 14:44:04 +0000 |
commit | 1eeb11bfd40e7478d09628ff9ddb561a620fbf8e (patch) | |
tree | cb4027eb2f76a4503cc085887ca85c155faac0c0 /llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | |
parent | cd574a5e462ae438b9ab2c24244b7e8d5e6a9337 (diff) | |
download | bcm5719-llvm-1eeb11bfd40e7478d09628ff9ddb561a620fbf8e.tar.gz bcm5719-llvm-1eeb11bfd40e7478d09628ff9ddb561a620fbf8e.zip |
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 507 |
1 files changed, 365 insertions, 142 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2f0e6027c7f..ae09831b916 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -43,10 +44,15 @@ using namespace llvm; namespace { +class AMDGPUAsmParser; struct OptionalOperand; enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + class AMDGPUOperand : public MCParsedAsmOperand { enum KindTy { Token, @@ -56,9 +62,11 @@ class AMDGPUOperand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; + const AMDGPUAsmParser *AsmParser; public: - AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {} + AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_) + : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} typedef std::unique_ptr<AMDGPUOperand> Ptr; @@ -143,8 +151,6 @@ public: }; struct RegOp { - const MCRegisterInfo *TRI; - const MCSubtargetInfo *STI; unsigned RegNo; bool IsForcedVOP3; Modifiers Mods; @@ -175,20 +181,8 @@ public: return Kind == Immediate; } - bool isInlinableImm() const { - if (!isImmTy(ImmTyNone)) { - // Only plain immediates are inlinable (e.g. "clamp" attribute is not) - return false; - } - // TODO: We should avoid using host float here. It would be better to - // check the float bit values which is what a few other places do. - // We've had bot failures before due to weird NaN support on mips hosts. - const float F = BitsToFloat(Imm.Val); - // TODO: Add 1/(2*pi) for VI - return (Imm.Val <= 64 && Imm.Val >= -16) || - (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 || - F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0); - } + bool isInlinableImm(MVT type) const; + bool isLiteralImm(MVT type) const; bool isRegKind() const { return Kind == Register; @@ -198,8 +192,24 @@ public: return isRegKind() && !Reg.Mods.hasModifiers(); } - bool isRegOrImmWithInputMods() const { - return isRegKind() || isInlinableImm(); + bool isRegOrImmWithInputMods(MVT type) const { + return isRegKind() || isInlinableImm(type); + } + + bool isRegOrImmWithInt32InputMods() const { + return isRegOrImmWithInputMods(MVT::i32); + } + + bool isRegOrImmWithInt64InputMods() const { + return isRegOrImmWithInputMods(MVT::i64); + } + + bool isRegOrImmWithFP32InputMods() const { + return isRegOrImmWithInputMods(MVT::f32); + } + + bool isRegOrImmWithFP64InputMods() const { + return isRegOrImmWithInputMods(MVT::f64); } bool isImmTy(ImmTy ImmT) const { @@ -243,47 +253,76 @@ public: return isReg() || isImm(); } - bool isRegClass(unsigned RCID) const { - return isReg() && Reg.TRI->getRegClass(RCID).contains(getReg()); + bool isRegClass(unsigned RCID) const; + + bool isSCSrcB32() const { + return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i32); + } + + bool isSCSrcB64() const { + return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::i64); } - bool isSCSrc32() const { - return isInlinableImm() || isRegClass(AMDGPU::SReg_32RegClassID); + bool isSCSrcF32() const { + return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f32); } - bool isSCSrc64() const { - return isInlinableImm() || isRegClass(AMDGPU::SReg_64RegClassID); + bool isSCSrcF64() const { + return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::f64); } - bool isSSrc32() const { - return isImm() || isSCSrc32() || isExpr(); + bool isSSrcB32() const { + return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); } - bool isSSrc64() const { + bool isSSrcB64() const { // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. // See isVSrc64(). - return isImm() || isSCSrc64(); + return isSCSrcB64() || isLiteralImm(MVT::i64); } - bool isVCSrc32() const { - return isInlinableImm() || isRegClass(AMDGPU::VS_32RegClassID); + bool isSSrcF32() const { + return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); } - bool isVCSrc64() const { - return isInlinableImm() || isRegClass(AMDGPU::VS_64RegClassID); + bool isSSrcF64() const { + return isSCSrcB64() || isLiteralImm(MVT::f64); } - bool isVSrc32() const { - return isImm() || isVCSrc32(); + bool isVCSrcB32() const { + return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i32); } - bool isVSrc64() const { - // TODO: Check if the 64-bit value (coming from assembly source) can be - // narrowed to 32 bits (in the instruction stream). That require knowledge - // of instruction type (unsigned/signed, floating or "untyped"/B64), - // see [AMD GCN3 ISA 6.3.1]. - // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns? - return isImm() || isVCSrc64(); + bool isVCSrcB64() const { + return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::i64); + } + + bool isVCSrcF32() const { + return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f32); + } + + bool isVCSrcF64() const { + return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::f64); + } + + bool isVSrcB32() const { + return isVCSrcF32() || isLiteralImm(MVT::i32); + } + + bool isVSrcB64() const { + return isVCSrcF64() || isLiteralImm(MVT::i64); + } + + bool isVSrcF32() const { + return isVCSrcF32() || isLiteralImm(MVT::f32); + } + + bool isVSrcF64() const { + return isVCSrcF64() || isLiteralImm(MVT::f64); + } + + bool isKImmFP32() const { + return isLiteralImm(MVT::f32); } bool isMem() const override { @@ -368,29 +407,13 @@ public: return getModifiers().hasIntModifiers(); } - void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const { - if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers()) { - // Apply modifiers to immediate value - int64_t Val = Imm.Val; - bool Negate = Imm.Mods.Neg; // Only negate can get here - if (Imm.IsFPImm) { - APFloat F(BitsToFloat(Val)); - if (Negate) { - F.changeSign(); - } - Val = F.bitcastToAPInt().getZExtValue(); - } else { - Val = Negate ? -Val : Val; - } - Inst.addOperand(MCOperand::createImm(Val)); - } else { - Inst.addOperand(MCOperand::createImm(getImm())); - } - } + void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; - void addRegOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI))); - } + void addLiteralImmOperand(MCInst &Inst, int64_t Val) const; + + void addKImmFP32Operands(MCInst &Inst, unsigned N) const; + + void addRegOperands(MCInst &Inst, unsigned N) const; void addRegOrImmOperands(MCInst &Inst, unsigned N) const { if (isRegKind()) @@ -484,10 +507,11 @@ public: } } - static AMDGPUOperand::Ptr CreateImm(int64_t Val, SMLoc Loc, + static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, + int64_t Val, SMLoc Loc, enum ImmTy Type = ImmTyNone, bool IsFPImm = false) { - auto Op = llvm::make_unique<AMDGPUOperand>(Immediate); + auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); Op->Imm.Val = Val; Op->Imm.IsFPImm = IsFPImm; Op->Imm.Type = Type; @@ -497,9 +521,10 @@ public: return Op; } - static AMDGPUOperand::Ptr CreateToken(StringRef Str, SMLoc Loc, + static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, + StringRef Str, SMLoc Loc, bool HasExplicitEncodingSize = true) { - auto Res = llvm::make_unique<AMDGPUOperand>(Token); + auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); Res->StartLoc = Loc; @@ -507,15 +532,12 @@ public: return Res; } - static AMDGPUOperand::Ptr CreateReg(unsigned RegNo, SMLoc S, + static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, + unsigned RegNo, SMLoc S, SMLoc E, - const MCRegisterInfo *TRI, - const MCSubtargetInfo *STI, bool ForceVOP3) { - auto Op = llvm::make_unique<AMDGPUOperand>(Register); + auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); Op->Reg.RegNo = RegNo; - Op->Reg.TRI = TRI; - Op->Reg.STI = STI; Op->Reg.Mods = {false, false, false}; Op->Reg.IsForcedVOP3 = ForceVOP3; Op->StartLoc = S; @@ -523,8 +545,9 @@ public: return Op; } - static AMDGPUOperand::Ptr CreateExpr(const class MCExpr *Expr, SMLoc S) { - auto Op = llvm::make_unique<AMDGPUOperand>(Expression); + static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, + const class MCExpr *Expr, SMLoc S) { + auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); Op->Expr = Expr; Op->StartLoc = S; Op->EndLoc = S; @@ -537,6 +560,10 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { return OS; } +//===----------------------------------------------------------------------===// +// AsmParser +//===----------------------------------------------------------------------===// + class AMDGPUAsmParser : public MCTargetAsmParser { const MCInstrInfo &MII; MCAsmParser &Parser; @@ -545,22 +572,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ForcedDPP; bool ForcedSDWA; - bool isSI() const { - return AMDGPU::isSI(getSTI()); - } - - bool isCI() const { - return AMDGPU::isCI(getSTI()); - } - - bool isVI() const { - return AMDGPU::isVI(getSTI()); - } - - bool hasSGPR102_SGPR103() const { - return !isVI(); - } - /// @name Auto-generated Match Functions /// { @@ -624,11 +635,37 @@ public: } } + bool isSI() const { + return AMDGPU::isSI(getSTI()); + } + + bool isCI() const { + return AMDGPU::isCI(getSTI()); + } + + bool isVI() const { + return AMDGPU::isVI(getSTI()); + } + + bool hasSGPR102_SGPR103() const { + return !isVI(); + } + AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<AMDGPUTargetStreamer &>(TS); } + const MCRegisterInfo *getMRI() const { + // We need this const_cast because for some reason getContext() is not const + // in MCAsmParser. + return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); + } + + const MCInstrInfo *getMII() const { + return &MII; + } + void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } @@ -652,6 +689,7 @@ public: StringRef parseMnemonicSuffix(StringRef Name); bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; + //bool ProcessInstruction(MCInst &Inst); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, @@ -738,6 +776,202 @@ struct OptionalOperand { } +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isInlinableImm(MVT type) const { + if (!isImmTy(ImmTyNone)) { + // Only plain immediates are inlinable (e.g. "clamp" attribute is not) + return false; + } + // TODO: We should avoid using host float here. It would be better to + // check the float bit values which is what a few other places do. + // We've had bot failures before due to weird NaN support on mips hosts. + + APInt Literal(64, Imm.Val); + + if (Imm.IsFPImm) { // We got fp literal token + if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand + return AMDGPU::isInlinableLiteral64(Imm.Val, AsmParser->isVI()); + } else { // Expected 32-bit operand + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + // Convert literal to single precision + APFloat::opStatus status = FPLiteral.convert(APFloat::IEEEsingle, + APFloat::rmNearestTiesToEven, + &lost); + // We allow precision lost but not overflow or underflow + if (status != APFloat::opOK && + lost && + ((status & APFloat::opOverflow) != 0 || + (status & APFloat::opUnderflow) != 0)) { + return false; + } + // Check if single precision literal is inlinable + return AMDGPU::isInlinableLiteral32( + static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), + AsmParser->isVI()); + } + } else { // We got int literal token + if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand + return AMDGPU::isInlinableLiteral64(Imm.Val, AsmParser->isVI()); + } else { // Expected 32-bit operand + return AMDGPU::isInlinableLiteral32( + static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), + AsmParser->isVI()); + } + } + return false; +} + +bool AMDGPUOperand::isLiteralImm(MVT type) const { + // Check that this imediate can be added as literal + if (!isImmTy(ImmTyNone)) { + return false; + } + + APInt Literal(64, Imm.Val); + + if (Imm.IsFPImm) { // We got fp literal token + if (type == MVT::f64) { // Expected 64-bit fp operand + // We would set low 64-bits of literal to zeroes but we accept this literals + return true; + } else if (type == MVT::i64) { // Expected 64-bit int operand + // We don't allow fp literals in 64-bit integer instructions. It is + // unclear how we should encode them. + return false; + } else { // Expected 32-bit operand + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + // Convert literal to single precision + APFloat::opStatus status = FPLiteral.convert(APFloat::IEEEsingle, + APFloat::rmNearestTiesToEven, + &lost); + // We allow precision lost but not overflow or underflow + if (status != APFloat::opOK && + lost && + ((status & APFloat::opOverflow) != 0 || + (status & APFloat::opUnderflow) != 0)) { + return false; + } + return true; + } + } else { // We got int literal token + APInt HiBits = Literal.getHiBits(32); + if (HiBits == 0xffffffff && + (*Literal.getLoBits(32).getRawData() & 0x80000000) != 0) { + // If high 32 bits aren't zeroes then they all should be ones and 32nd + // bit should be set. So that this 64-bit literal is sign-extension of + // 32-bit value. + return true; + } else if (HiBits == 0) { + return true; + } + } + return false; +} + +bool AMDGPUOperand::isRegClass(unsigned RCID) const { + return isReg() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); +} + +void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { + int64_t Val = Imm.Val; + if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) { + // Apply modifiers to immediate value. Only negate can get here + if (Imm.IsFPImm) { + APFloat F(BitsToDouble(Val)); + F.changeSign(); + Val = F.bitcastToAPInt().getZExtValue(); + } else { + Val = -Val; + } + } + + if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), Inst.getNumOperands())) { + addLiteralImmOperand(Inst, Val); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } +} + +void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { + const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); + auto OpNum = Inst.getNumOperands(); + // Check that this operand accepts literals + assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); + + APInt Literal(64, Val); + auto OpSize = AMDGPU::getRegOperandSize(AsmParser->getMRI(), InstDesc, OpNum); // expected operand size + + if (Imm.IsFPImm) { // We got fp literal token + if (OpSize == 8) { // Expected 64-bit operand + // Check if literal is inlinable + if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->isVI())) { + Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); + } else if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand + // For fp operands we check if low 32 bits are zeros + if (Literal.getLoBits(32) != 0) { + const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), + "Can't encode literal as exact 64-bit" + " floating-point operand. Low 32-bits will be" + " set to zero"); + } + Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); + } else { + // We don't allow fp literals in 64-bit integer instructions. It is + // unclear how we should encode them. This case should be checked earlier + // in predicate methods (isLiteralImm()) + llvm_unreachable("fp literal in 64-bit integer instruction."); + } + } else { // Expected 32-bit operand + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + // Convert literal to single precision + FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + // We allow precision lost but not overflow or underflow. This should be + // checked earlier in isLiteralImm() + Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); + } + } else { // We got int literal token + if (OpSize == 8) { // Expected 64-bit operand + auto LiteralVal = Literal.getZExtValue(); + if (AMDGPU::isInlinableLiteral64(LiteralVal, AsmParser->isVI())) { + Inst.addOperand(MCOperand::createImm(LiteralVal)); + return; + } + } else { // Expected 32-bit operand + auto LiteralVal = static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()); + if (AMDGPU::isInlinableLiteral32(LiteralVal, AsmParser->isVI())) { + Inst.addOperand(MCOperand::createImm(LiteralVal)); + return; + } + } + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); + } +} + +void AMDGPUOperand::addKImmFP32Operands(MCInst &Inst, unsigned N) const { + APInt Literal(64, Imm.Val); + if (Imm.IsFPImm) { // We got fp literal + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); + } else { // We got int literal token + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); + } +} + +void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); +} + +//===----------------------------------------------------------------------===// +// AsmParser +//===----------------------------------------------------------------------===// + static int getRegClass(RegisterKind Is, unsigned RegWidth) { if (Is == IS_VGPR) { switch (RegWidth) { @@ -952,20 +1186,18 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { const auto &Tok = Parser.getTok(); SMLoc StartLoc = Tok.getLoc(); SMLoc EndLoc = Tok.getEndLoc(); - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - RegisterKind RegKind; unsigned Reg, RegNum, RegWidth; if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { return nullptr; } - return AMDGPUOperand::CreateReg(Reg, StartLoc, EndLoc, - TRI, &getSTI(), false); + return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); } AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseImm(OperandVector &Operands) { + // TODO: add syntactic sugar for 1/(2*PI) bool Minus = false; if (getLexer().getKind() == AsmToken::Minus) { Minus = true; @@ -978,28 +1210,21 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) { int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) { - Error(S, "invalid immediate: only 32-bit values are legal"); - return MatchOperand_ParseFail; - } - if (Minus) IntVal *= -1; - Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); return MatchOperand_Success; } case AsmToken::Real: { - // FIXME: We should emit an error if a double precisions floating-point - // value is used. I'm not sure the best way to detect this. int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - APFloat F((float)BitsToDouble(IntVal)); + APFloat F(BitsToDouble(IntVal)); if (Minus) F.changeSign(); Operands.push_back( - AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S, + AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, AMDGPUOperand::ImmTyNone, true)); return MatchOperand_Success; } @@ -1505,11 +1730,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { SMLoc S = Tok.getLoc(); const MCExpr *Expr = nullptr; if (!Parser.parseExpression(Expr)) { - Operands.push_back(AMDGPUOperand::CreateExpr(Expr, S)); + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); return MatchOperand_Success; } - Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc())); + Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), Tok.getLoc())); Parser.Lex(); return MatchOperand_Success; } @@ -1543,7 +1768,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, SMLoc NameLoc, OperandVector &Operands) { // Add the instruction mnemonic Name = parseMnemonicSuffix(Name); - Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc)); + Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); while (!getLexer().is(AsmToken::EndOfStatement)) { AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); @@ -1618,7 +1843,7 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, return MatchOperand_ParseFail; } - Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); return MatchOperand_Success; } @@ -1650,7 +1875,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, } } - Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -1825,7 +2050,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { } while(getLexer().isNot(AsmToken::EndOfStatement)); break; } - Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S)); return MatchOperand_Success; } @@ -1930,7 +2155,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { } break; } - Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); return MatchOperand_Success; } @@ -2113,7 +2338,7 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { } break; } - Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); return MatchOperand_Success; } @@ -2135,12 +2360,12 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { int64_t Imm; if (getParser().parseAbsoluteExpression(Imm)) return MatchOperand_ParseFail; - Operands.push_back(AMDGPUOperand::CreateImm(Imm, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); return MatchOperand_Success; } case AsmToken::Identifier: - Operands.push_back(AMDGPUOperand::CreateExpr( + Operands.push_back(AMDGPUOperand::CreateExpr(this, MCSymbolRefExpr::create(getContext().getOrCreateSymbol( Parser.getTok().getString()), getContext()), S)); Parser.Lex(); @@ -2153,15 +2378,15 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { //===----------------------------------------------------------------------===// AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyGLC); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySLC); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE); } void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, @@ -2284,23 +2509,23 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDMask); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyUNorm); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDA); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyR128); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE); } //===----------------------------------------------------------------------===// @@ -2321,11 +2546,11 @@ bool AMDGPUOperand::isSMRDLiteralOffset() const { } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } //===----------------------------------------------------------------------===// @@ -2458,8 +2683,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (Op.isRegOrImmWithInputMods()) { - // only fp modifiers allowed in VOP3 + if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) { Op.addRegOrImmWithFPInputModsOperands(Inst, 2); } else if (Op.isImm()) { OptionalIdx[Op.getImmTy()] = I; @@ -2605,21 +2829,20 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { } Parser.Lex(); // eat last token - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, - AMDGPUOperand::ImmTyDppCtrl)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); return MatchOperand_Success; } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { - return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); + return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { - return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); + return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); } void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { @@ -2634,8 +2857,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments - if (Op.isRegOrImmWithInputMods()) { - // Only float modifiers supported in DPP + if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) { Op.addRegOrImmWithFPInputModsOperands(Inst, 2); } else if (Op.isDPPCtrl()) { Op.addImmOperands(Inst, 1); @@ -2684,7 +2906,7 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, return MatchOperand_ParseFail; } - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); return MatchOperand_Success; } @@ -2711,8 +2933,7 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, - AMDGPUOperand::ImmTySdwaDstUnused)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); return MatchOperand_Success; } @@ -2746,8 +2967,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Op.Reg.RegNo == AMDGPU::VCC) { // VOPC sdwa use "vcc" token as dst. Skip it. continue; - } else if (Op.isRegOrImmWithInputMods()) { - Op.addRegOrImmWithInputModsOperands(Inst, 2); + } else if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) { + Op.addRegOrImmWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments OptionalIdx[Op.getImmTy()] = I; @@ -2817,14 +3038,16 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; case MCK_offen: return Operand.isOffen() ? Match_Success : Match_InvalidOperand; - case MCK_SSrc32: + case MCK_SSrcB32: // When operands have expression values, they will return true for isToken, // because it is not possible to distinguish between a token and an // expression at parse time. MatchInstructionImpl() will always try to // match an operand as a token, when isToken returns true, and when the // name of the expression is not a valid token, the match will fail, // so we need to handle it here. - return Operand.isSSrc32() ? Match_Success : Match_InvalidOperand; + return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; + case MCK_SSrcF32: + return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; case MCK_SoppBrTarget: return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; default: return Match_InvalidOperand; |