//===-- RISCVAsmParser.cpp - Parse RISCV assembly to MCInst instructions --===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; namespace { struct RISCVOperand; class RISCVAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo, int Lower, int Upper, Twine Msg); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override; bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; // Auto-generated instruction matching functions #define GET_ASSEMBLER_HEADER #include "RISCVGenAsmMatcher.inc" OperandMatchResultTy parseImmediate(OperandVector &Operands); OperandMatchResultTy parseRegister(OperandVector &Operands); OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands); OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands); bool parseOperand(OperandVector &Operands); public: enum RISCVMatchResultTy { Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "RISCVGenAsmMatcher.inc" #undef GET_OPERAND_DIAGNOSTIC_TYPES }; static bool classifySymbolRef(const MCExpr *Expr, RISCVMCExpr::VariantKind &Kind, int64_t &Addend); RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, STI, MII) { setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } }; /// RISCVOperand - Instances of this class represent a parsed machine /// instruction struct RISCVOperand : public MCParsedAsmOperand { enum KindTy { Token, Register, Immediate, } Kind; struct RegOp { unsigned RegNum; }; struct ImmOp { const MCExpr *Val; }; SMLoc StartLoc, EndLoc; union { StringRef Tok; RegOp Reg; ImmOp Imm; }; RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} public: RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { case Register: Reg = o.Reg; break; case Immediate: Imm = o.Imm; break; case Token: Tok = o.Tok; break; } } bool isToken() const override { return Kind == Token; } bool isReg() const override { return Kind == Register; } bool isImm() const override { return Kind == Immediate; } bool isMem() const override { return false; } bool evaluateConstantImm(int64_t &Imm, RISCVMCExpr::VariantKind &VK) const { const MCExpr *Val = getImm(); bool Ret = false; if (auto *RE = dyn_cast(Val)) { Ret = RE->evaluateAsConstant(Imm); VK = RE->getKind(); } else if (auto CE = dyn_cast(Val)) { Ret = true; VK = RISCVMCExpr::VK_RISCV_None; Imm = CE->getValue(); } return Ret; } // True if operand is a symbol with no modifiers, or a constant with no // modifiers and isShiftedInt(Op). template bool isBareSimmNLsb0() const { int64_t Imm; RISCVMCExpr::VariantKind VK; bool IsConstantImm = evaluateConstantImm(Imm, VK); bool IsValid; if (!IsConstantImm) IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); else IsValid = isShiftedInt(Imm); return IsValid && VK == RISCVMCExpr::VK_RISCV_None; } // Predicate methods for AsmOperands defined in RISCVInstrInfo.td /// Return true if the operand is a valid for the fence instruction e.g. /// ('iorw'). bool isFenceArg() const { if (!isImm()) return false; const MCExpr *Val = getImm(); auto *SVal = dyn_cast(Val); if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None) return false; StringRef Str = SVal->getSymbol().getName(); // Letters must be unique, taken from 'iorw', and in ascending order. This // holds as long as each individual character is one of 'iorw' and is // greater than the previous character. char Prev = '\0'; for (char c : Str) { if (c != 'i' && c != 'o' && c != 'r' && c != 'w') return false; if (c <= Prev) return false; Prev = c; } return true; } bool isUImm5() const { int64_t Imm; RISCVMCExpr::VariantKind VK; bool IsConstantImm = evaluateConstantImm(Imm, VK); return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } bool isSImm12() const { RISCVMCExpr::VariantKind VK; int64_t Imm; bool IsValid; bool IsConstantImm = evaluateConstantImm(Imm, VK); if (!IsConstantImm) IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); else IsValid = isInt<12>(Imm); return IsValid && (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO); } bool isUImm12() const { int64_t Imm; RISCVMCExpr::VariantKind VK; bool IsConstantImm = evaluateConstantImm(Imm, VK); return IsConstantImm && isUInt<12>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); } bool isUImm20() const { RISCVMCExpr::VariantKind VK; int64_t Imm; bool IsValid; bool IsConstantImm = evaluateConstantImm(Imm, VK); if (!IsConstantImm) IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); else IsValid = isUInt<20>(Imm); return IsValid && (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_HI || VK == RISCVMCExpr::VK_RISCV_PCREL_HI); } bool isSImm21Lsb0() const { return isBareSimmNLsb0<21>(); } /// getStartLoc - Gets location of the first token of this operand SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Gets location of the last token of this operand SMLoc getEndLoc() const override { return EndLoc; } unsigned getReg() const override { assert(Kind == Register && "Invalid type access!"); return Reg.RegNum; } const MCExpr *getImm() const { assert(Kind == Immediate && "Invalid type access!"); return Imm.Val; } StringRef getToken() const { assert(Kind == Token && "Invalid type access!"); return Tok; } void print(raw_ostream &OS) const override { switch (Kind) { case Immediate: OS << *getImm(); break; case Register: OS << ""; break; case Token: OS << "'" << getToken() << "'"; break; } } static std::unique_ptr createToken(StringRef Str, SMLoc S) { auto Op = make_unique(Token); Op->Tok = Str; Op->StartLoc = S; Op->EndLoc = S; return Op; } static std::unique_ptr createReg(unsigned RegNo, SMLoc S, SMLoc E) { auto Op = make_unique(Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; return Op; } static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, SMLoc E) { auto Op = make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } void addExpr(MCInst &Inst, const MCExpr *Expr) const { assert(Expr && "Expr shouldn't be null!"); int64_t Imm = 0; bool IsConstant = false; if (auto *RE = dyn_cast(Expr)) { IsConstant = RE->evaluateAsConstant(Imm); } else if (auto *CE = dyn_cast(Expr)) { IsConstant = true; Imm = CE->getValue(); } if (IsConstant) Inst.addOperand(MCOperand::createImm(Imm)); else Inst.addOperand(MCOperand::createExpr(Expr)); } // Used by the TableGen Code void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getReg())); } void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } void addFenceArgOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // isFenceArg has validated the operand, meaning this cast is safe auto SE = cast(getImm()); unsigned Imm = 0; for (char c : SE->getSymbol().getName()) { switch (c) { default: llvm_unreachable("FenceArg must contain only [iorw]"); case 'i': Imm |= RISCVFenceField::I; break; case 'o': Imm |= RISCVFenceField::O; break; case 'r': Imm |= RISCVFenceField::R; break; case 'w': Imm |= RISCVFenceField::W; break; } } Inst.addOperand(MCOperand::createImm(Imm)); } }; } // end anonymous namespace. #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION #include "RISCVGenAsmMatcher.inc" bool RISCVAsmParser::generateImmOutOfRangeError( OperandVector &Operands, uint64_t ErrorInfo, int Lower, int Upper, Twine Msg = "immediate must be an integer in the range") { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]"); } bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { default: break; case Match_Success: Inst.setLoc(IDLoc); Out.EmitInstruction(Inst, getSTI()); return false; case Match_MissingFeature: return Error(IDLoc, "instruction use requires an option to be enabled"); case Match_MnemonicFail: return Error(IDLoc, "unrecognized instruction mnemonic"); case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0U) { if (ErrorInfo >= Operands.size()) return Error(ErrorLoc, "too few operands for instruction"); ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } return Error(ErrorLoc, "invalid operand for instruction"); } case Match_InvalidUImm5: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1); case Match_InvalidSImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1); case Match_InvalidUImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1); case Match_InvalidSImm13Lsb0: return generateImmOutOfRangeError( Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2, "immediate must be a multiple of 2 bytes in the range"); case Match_InvalidUImm20: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1); case Match_InvalidSImm21Lsb0: return generateImmOutOfRangeError( Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2, "immediate must be a multiple of 2 bytes in the range"); case Match_InvalidFenceArg: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return Error( ErrorLoc, "operand must be formed of letters selected in-order from 'iorw'"); } } llvm_unreachable("Unknown match type detected!"); } bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { const AsmToken &Tok = getParser().getTok(); StartLoc = Tok.getLoc(); EndLoc = Tok.getEndLoc(); RegNo = 0; StringRef Name = getLexer().getTok().getIdentifier(); if (!MatchRegisterName(Name) || !MatchRegisterAltName(Name)) { getParser().Lex(); // Eat identifier token. return false; } return Error(StartLoc, "invalid register name"); } OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands) { SMLoc S = getLoc(); SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); switch (getLexer().getKind()) { default: return MatchOperand_NoMatch; case AsmToken::Identifier: StringRef Name = getLexer().getTok().getIdentifier(); unsigned RegNo = MatchRegisterName(Name); if (RegNo == 0) { RegNo = MatchRegisterAltName(Name); if (RegNo == 0) return MatchOperand_NoMatch; } getLexer().Lex(); Operands.push_back(RISCVOperand::createReg(RegNo, S, E)); } return MatchOperand_Success; } OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) { SMLoc S = getLoc(); SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); const MCExpr *Res; switch (getLexer().getKind()) { default: return MatchOperand_NoMatch; case AsmToken::LParen: case AsmToken::Minus: case AsmToken::Plus: case AsmToken::Integer: case AsmToken::String: if (getParser().parseExpression(Res)) return MatchOperand_ParseFail; break; case AsmToken::Identifier: { StringRef Identifier; if (getParser().parseIdentifier(Identifier)) return MatchOperand_ParseFail; MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); break; } case AsmToken::Percent: return parseOperandWithModifier(Operands); } Operands.push_back(RISCVOperand::createImm(Res, S, E)); return MatchOperand_Success; } OperandMatchResultTy RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) { SMLoc S = getLoc(); SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); if (getLexer().getKind() != AsmToken::Percent) { Error(getLoc(), "expected '%' for operand modifier"); return MatchOperand_ParseFail; } getParser().Lex(); // Eat '%' if (getLexer().getKind() != AsmToken::Identifier) { Error(getLoc(), "expected valid identifier for operand modifier"); return MatchOperand_ParseFail; } StringRef Identifier = getParser().getTok().getIdentifier(); RISCVMCExpr::VariantKind VK = RISCVMCExpr::getVariantKindForName(Identifier); if (VK == RISCVMCExpr::VK_RISCV_Invalid) { Error(getLoc(), "unrecognized operand modifier"); return MatchOperand_ParseFail; } getParser().Lex(); // Eat the identifier if (getLexer().getKind() != AsmToken::LParen) { Error(getLoc(), "expected '('"); return MatchOperand_ParseFail; } getParser().Lex(); // Eat '(' const MCExpr *SubExpr; if (getParser().parseParenExpression(SubExpr, E)) { return MatchOperand_ParseFail; } const MCExpr *ModExpr = RISCVMCExpr::create(SubExpr, VK, getContext()); Operands.push_back(RISCVOperand::createImm(ModExpr, S, E)); return MatchOperand_Success; } OperandMatchResultTy RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { if (getLexer().isNot(AsmToken::LParen)) { Error(getLoc(), "expected '('"); return MatchOperand_ParseFail; } getParser().Lex(); // Eat '(' Operands.push_back(RISCVOperand::createToken("(", getLoc())); if (parseRegister(Operands) != MatchOperand_Success) { Error(getLoc(), "expected register"); return MatchOperand_ParseFail; } if (getLexer().isNot(AsmToken::RParen)) { Error(getLoc(), "expected ')'"); return MatchOperand_ParseFail; } getParser().Lex(); // Eat ')' Operands.push_back(RISCVOperand::createToken(")", getLoc())); return MatchOperand_Success; } /// Looks at a token type and creates the relevant operand /// from this information, adding to Operands. /// If operand was parsed, returns false, else true. bool RISCVAsmParser::parseOperand(OperandVector &Operands) { // Attempt to parse token as register if (parseRegister(Operands) == MatchOperand_Success) return false; // Attempt to parse token as an immediate if (parseImmediate(Operands) == MatchOperand_Success) { // Parse memory base register if present if (getLexer().is(AsmToken::LParen)) return parseMemOpBaseReg(Operands) != MatchOperand_Success; return false; } // Finally we have exhausted all options and must declare defeat. Error(getLoc(), "unknown operand"); return true; } bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // First operand is token for instruction Operands.push_back(RISCVOperand::createToken(Name, NameLoc)); // If there are no more operands, then finish if (getLexer().is(AsmToken::EndOfStatement)) return false; // Parse first operand if (parseOperand(Operands)) return true; // Parse until end of statement, consuming commas between operands while (getLexer().is(AsmToken::Comma)) { // Consume comma token getLexer().Lex(); // Parse next operand if (parseOperand(Operands)) return true; } if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); getParser().eatToEndOfStatement(); return Error(Loc, "unexpected token"); } getParser().Lex(); // Consume the EndOfStatement. return false; } bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr, RISCVMCExpr::VariantKind &Kind, int64_t &Addend) { Kind = RISCVMCExpr::VK_RISCV_None; Addend = 0; if (const RISCVMCExpr *RE = dyn_cast(Expr)) { Kind = RE->getKind(); Expr = RE->getSubExpr(); } // It's a simple symbol reference or constant with no addend. if (isa(Expr) || isa(Expr)) return true; const MCBinaryExpr *BE = dyn_cast(Expr); if (!BE) return false; if (!isa(BE->getLHS())) return false; if (BE->getOpcode() != MCBinaryExpr::Add && BE->getOpcode() != MCBinaryExpr::Sub) return false; // We are able to support the subtraction of two symbol references if (BE->getOpcode() == MCBinaryExpr::Sub && isa(BE->getRHS())) return true; // See if the addend is is a constant, otherwise there's more going // on here than we can deal with. auto AddendExpr = dyn_cast(BE->getRHS()); if (!AddendExpr) return false; Addend = AddendExpr->getValue(); if (BE->getOpcode() == MCBinaryExpr::Sub) Addend = -Addend; // It's some symbol reference + a constant addend return Kind != RISCVMCExpr::VK_RISCV_Invalid; } bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) { return true; } extern "C" void LLVMInitializeRISCVAsmParser() { RegisterMCAsmParser X(getTheRISCV32Target()); RegisterMCAsmParser Y(getTheRISCV64Target()); }