diff options
23 files changed, 1764 insertions, 519 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 2845c245689..702335b58d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -49,13 +49,6 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; -// 32-bit VALU immediate operand that uses the constant bus. -def u32kimm : Operand<i32> { - let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_KIMM32"; - let PrintMethod = "printU32ImmOperand"; -} - let OperandType = "OPERAND_IMMEDIATE" in { def u32imm : Operand<i32> { diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2f0e6027c7f..ae09831b916 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -43,10 +44,15 @@ using namespace llvm; namespace { +class AMDGPUAsmParser; struct OptionalOperand; enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + class AMDGPUOperand : public MCParsedAsmOperand { enum KindTy { Token, @@ -56,9 +62,11 @@ class AMDGPUOperand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; + const AMDGPUAsmParser *AsmParser; public: - AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {} + AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_) + : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} typedef std::unique_ptr<AMDGPUOperand> Ptr; @@ -143,8 +151,6 @@ public: }; struct RegOp { - const MCRegisterInfo *TRI; - const MCSubtargetInfo *STI; unsigned RegNo; bool IsForcedVOP3; Modifiers Mods; @@ -175,20 +181,8 @@ public: return Kind == Immediate; } - bool isInlinableImm() const { - if (!isImmTy(ImmTyNone)) { - // Only plain immediates are inlinable (e.g. "clamp" attribute is not) - return false; - } - // TODO: We should avoid using host float here. It would be better to - // check the float bit values which is what a few other places do. - // We've had bot failures before due to weird NaN support on mips hosts. - const float F = BitsToFloat(Imm.Val); - // TODO: Add 1/(2*pi) for VI - return (Imm.Val <= 64 && Imm.Val >= -16) || - (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 || - F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0); - } + bool isInlinableImm(MVT type) const; + bool isLiteralImm(MVT type) const; bool isRegKind() const { return Kind == Register; @@ -198,8 +192,24 @@ public: return isRegKind() && !Reg.Mods.hasModifiers(); } - bool isRegOrImmWithInputMods() const { - return isRegKind() || isInlinableImm(); + bool isRegOrImmWithInputMods(MVT type) const { + return isRegKind() || isInlinableImm(type); + } + + bool isRegOrImmWithInt32InputMods() const { + return isRegOrImmWithInputMods(MVT::i32); + } + + bool isRegOrImmWithInt64InputMods() const { + return isRegOrImmWithInputMods(MVT::i64); + } + + bool isRegOrImmWithFP32InputMods() const { + return isRegOrImmWithInputMods(MVT::f32); + } + + bool isRegOrImmWithFP64InputMods() const { + return isRegOrImmWithInputMods(MVT::f64); } bool isImmTy(ImmTy ImmT) const { @@ -243,47 +253,76 @@ public: return isReg() || isImm(); } - bool isRegClass(unsigned RCID) const { - return isReg() && Reg.TRI->getRegClass(RCID).contains(getReg()); + bool isRegClass(unsigned RCID) const; + + bool isSCSrcB32() const { + return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i32); + } + + bool isSCSrcB64() const { + return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::i64); } - bool isSCSrc32() const { - return isInlinableImm() || isRegClass(AMDGPU::SReg_32RegClassID); + bool isSCSrcF32() const { + return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f32); } - bool isSCSrc64() const { - return isInlinableImm() || isRegClass(AMDGPU::SReg_64RegClassID); + bool isSCSrcF64() const { + return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::f64); } - bool isSSrc32() const { - return isImm() || isSCSrc32() || isExpr(); + bool isSSrcB32() const { + return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); } - bool isSSrc64() const { + bool isSSrcB64() const { // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. // See isVSrc64(). - return isImm() || isSCSrc64(); + return isSCSrcB64() || isLiteralImm(MVT::i64); } - bool isVCSrc32() const { - return isInlinableImm() || isRegClass(AMDGPU::VS_32RegClassID); + bool isSSrcF32() const { + return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); } - bool isVCSrc64() const { - return isInlinableImm() || isRegClass(AMDGPU::VS_64RegClassID); + bool isSSrcF64() const { + return isSCSrcB64() || isLiteralImm(MVT::f64); } - bool isVSrc32() const { - return isImm() || isVCSrc32(); + bool isVCSrcB32() const { + return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i32); } - bool isVSrc64() const { - // TODO: Check if the 64-bit value (coming from assembly source) can be - // narrowed to 32 bits (in the instruction stream). That require knowledge - // of instruction type (unsigned/signed, floating or "untyped"/B64), - // see [AMD GCN3 ISA 6.3.1]. - // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns? - return isImm() || isVCSrc64(); + bool isVCSrcB64() const { + return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::i64); + } + + bool isVCSrcF32() const { + return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f32); + } + + bool isVCSrcF64() const { + return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::f64); + } + + bool isVSrcB32() const { + return isVCSrcF32() || isLiteralImm(MVT::i32); + } + + bool isVSrcB64() const { + return isVCSrcF64() || isLiteralImm(MVT::i64); + } + + bool isVSrcF32() const { + return isVCSrcF32() || isLiteralImm(MVT::f32); + } + + bool isVSrcF64() const { + return isVCSrcF64() || isLiteralImm(MVT::f64); + } + + bool isKImmFP32() const { + return isLiteralImm(MVT::f32); } bool isMem() const override { @@ -368,29 +407,13 @@ public: return getModifiers().hasIntModifiers(); } - void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const { - if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers()) { - // Apply modifiers to immediate value - int64_t Val = Imm.Val; - bool Negate = Imm.Mods.Neg; // Only negate can get here - if (Imm.IsFPImm) { - APFloat F(BitsToFloat(Val)); - if (Negate) { - F.changeSign(); - } - Val = F.bitcastToAPInt().getZExtValue(); - } else { - Val = Negate ? -Val : Val; - } - Inst.addOperand(MCOperand::createImm(Val)); - } else { - Inst.addOperand(MCOperand::createImm(getImm())); - } - } + void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; - void addRegOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI))); - } + void addLiteralImmOperand(MCInst &Inst, int64_t Val) const; + + void addKImmFP32Operands(MCInst &Inst, unsigned N) const; + + void addRegOperands(MCInst &Inst, unsigned N) const; void addRegOrImmOperands(MCInst &Inst, unsigned N) const { if (isRegKind()) @@ -484,10 +507,11 @@ public: } } - static AMDGPUOperand::Ptr CreateImm(int64_t Val, SMLoc Loc, + static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, + int64_t Val, SMLoc Loc, enum ImmTy Type = ImmTyNone, bool IsFPImm = false) { - auto Op = llvm::make_unique<AMDGPUOperand>(Immediate); + auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); Op->Imm.Val = Val; Op->Imm.IsFPImm = IsFPImm; Op->Imm.Type = Type; @@ -497,9 +521,10 @@ public: return Op; } - static AMDGPUOperand::Ptr CreateToken(StringRef Str, SMLoc Loc, + static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, + StringRef Str, SMLoc Loc, bool HasExplicitEncodingSize = true) { - auto Res = llvm::make_unique<AMDGPUOperand>(Token); + auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); Res->StartLoc = Loc; @@ -507,15 +532,12 @@ public: return Res; } - static AMDGPUOperand::Ptr CreateReg(unsigned RegNo, SMLoc S, + static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, + unsigned RegNo, SMLoc S, SMLoc E, - const MCRegisterInfo *TRI, - const MCSubtargetInfo *STI, bool ForceVOP3) { - auto Op = llvm::make_unique<AMDGPUOperand>(Register); + auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); Op->Reg.RegNo = RegNo; - Op->Reg.TRI = TRI; - Op->Reg.STI = STI; Op->Reg.Mods = {false, false, false}; Op->Reg.IsForcedVOP3 = ForceVOP3; Op->StartLoc = S; @@ -523,8 +545,9 @@ public: return Op; } - static AMDGPUOperand::Ptr CreateExpr(const class MCExpr *Expr, SMLoc S) { - auto Op = llvm::make_unique<AMDGPUOperand>(Expression); + static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, + const class MCExpr *Expr, SMLoc S) { + auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); Op->Expr = Expr; Op->StartLoc = S; Op->EndLoc = S; @@ -537,6 +560,10 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { return OS; } +//===----------------------------------------------------------------------===// +// AsmParser +//===----------------------------------------------------------------------===// + class AMDGPUAsmParser : public MCTargetAsmParser { const MCInstrInfo &MII; MCAsmParser &Parser; @@ -545,22 +572,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ForcedDPP; bool ForcedSDWA; - bool isSI() const { - return AMDGPU::isSI(getSTI()); - } - - bool isCI() const { - return AMDGPU::isCI(getSTI()); - } - - bool isVI() const { - return AMDGPU::isVI(getSTI()); - } - - bool hasSGPR102_SGPR103() const { - return !isVI(); - } - /// @name Auto-generated Match Functions /// { @@ -624,11 +635,37 @@ public: } } + bool isSI() const { + return AMDGPU::isSI(getSTI()); + } + + bool isCI() const { + return AMDGPU::isCI(getSTI()); + } + + bool isVI() const { + return AMDGPU::isVI(getSTI()); + } + + bool hasSGPR102_SGPR103() const { + return !isVI(); + } + AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<AMDGPUTargetStreamer &>(TS); } + const MCRegisterInfo *getMRI() const { + // We need this const_cast because for some reason getContext() is not const + // in MCAsmParser. + return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); + } + + const MCInstrInfo *getMII() const { + return &MII; + } + void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } @@ -652,6 +689,7 @@ public: StringRef parseMnemonicSuffix(StringRef Name); bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; + //bool ProcessInstruction(MCInst &Inst); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, @@ -738,6 +776,202 @@ struct OptionalOperand { } +//===----------------------------------------------------------------------===// +// Operand +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isInlinableImm(MVT type) const { + if (!isImmTy(ImmTyNone)) { + // Only plain immediates are inlinable (e.g. "clamp" attribute is not) + return false; + } + // TODO: We should avoid using host float here. It would be better to + // check the float bit values which is what a few other places do. + // We've had bot failures before due to weird NaN support on mips hosts. + + APInt Literal(64, Imm.Val); + + if (Imm.IsFPImm) { // We got fp literal token + if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand + return AMDGPU::isInlinableLiteral64(Imm.Val, AsmParser->isVI()); + } else { // Expected 32-bit operand + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + // Convert literal to single precision + APFloat::opStatus status = FPLiteral.convert(APFloat::IEEEsingle, + APFloat::rmNearestTiesToEven, + &lost); + // We allow precision lost but not overflow or underflow + if (status != APFloat::opOK && + lost && + ((status & APFloat::opOverflow) != 0 || + (status & APFloat::opUnderflow) != 0)) { + return false; + } + // Check if single precision literal is inlinable + return AMDGPU::isInlinableLiteral32( + static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), + AsmParser->isVI()); + } + } else { // We got int literal token + if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand + return AMDGPU::isInlinableLiteral64(Imm.Val, AsmParser->isVI()); + } else { // Expected 32-bit operand + return AMDGPU::isInlinableLiteral32( + static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), + AsmParser->isVI()); + } + } + return false; +} + +bool AMDGPUOperand::isLiteralImm(MVT type) const { + // Check that this imediate can be added as literal + if (!isImmTy(ImmTyNone)) { + return false; + } + + APInt Literal(64, Imm.Val); + + if (Imm.IsFPImm) { // We got fp literal token + if (type == MVT::f64) { // Expected 64-bit fp operand + // We would set low 64-bits of literal to zeroes but we accept this literals + return true; + } else if (type == MVT::i64) { // Expected 64-bit int operand + // We don't allow fp literals in 64-bit integer instructions. It is + // unclear how we should encode them. + return false; + } else { // Expected 32-bit operand + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + // Convert literal to single precision + APFloat::opStatus status = FPLiteral.convert(APFloat::IEEEsingle, + APFloat::rmNearestTiesToEven, + &lost); + // We allow precision lost but not overflow or underflow + if (status != APFloat::opOK && + lost && + ((status & APFloat::opOverflow) != 0 || + (status & APFloat::opUnderflow) != 0)) { + return false; + } + return true; + } + } else { // We got int literal token + APInt HiBits = Literal.getHiBits(32); + if (HiBits == 0xffffffff && + (*Literal.getLoBits(32).getRawData() & 0x80000000) != 0) { + // If high 32 bits aren't zeroes then they all should be ones and 32nd + // bit should be set. So that this 64-bit literal is sign-extension of + // 32-bit value. + return true; + } else if (HiBits == 0) { + return true; + } + } + return false; +} + +bool AMDGPUOperand::isRegClass(unsigned RCID) const { + return isReg() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); +} + +void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { + int64_t Val = Imm.Val; + if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) { + // Apply modifiers to immediate value. Only negate can get here + if (Imm.IsFPImm) { + APFloat F(BitsToDouble(Val)); + F.changeSign(); + Val = F.bitcastToAPInt().getZExtValue(); + } else { + Val = -Val; + } + } + + if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), Inst.getNumOperands())) { + addLiteralImmOperand(Inst, Val); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } +} + +void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { + const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); + auto OpNum = Inst.getNumOperands(); + // Check that this operand accepts literals + assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); + + APInt Literal(64, Val); + auto OpSize = AMDGPU::getRegOperandSize(AsmParser->getMRI(), InstDesc, OpNum); // expected operand size + + if (Imm.IsFPImm) { // We got fp literal token + if (OpSize == 8) { // Expected 64-bit operand + // Check if literal is inlinable + if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->isVI())) { + Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); + } else if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand + // For fp operands we check if low 32 bits are zeros + if (Literal.getLoBits(32) != 0) { + const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), + "Can't encode literal as exact 64-bit" + " floating-point operand. Low 32-bits will be" + " set to zero"); + } + Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); + } else { + // We don't allow fp literals in 64-bit integer instructions. It is + // unclear how we should encode them. This case should be checked earlier + // in predicate methods (isLiteralImm()) + llvm_unreachable("fp literal in 64-bit integer instruction."); + } + } else { // Expected 32-bit operand + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + // Convert literal to single precision + FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + // We allow precision lost but not overflow or underflow. This should be + // checked earlier in isLiteralImm() + Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); + } + } else { // We got int literal token + if (OpSize == 8) { // Expected 64-bit operand + auto LiteralVal = Literal.getZExtValue(); + if (AMDGPU::isInlinableLiteral64(LiteralVal, AsmParser->isVI())) { + Inst.addOperand(MCOperand::createImm(LiteralVal)); + return; + } + } else { // Expected 32-bit operand + auto LiteralVal = static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()); + if (AMDGPU::isInlinableLiteral32(LiteralVal, AsmParser->isVI())) { + Inst.addOperand(MCOperand::createImm(LiteralVal)); + return; + } + } + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); + } +} + +void AMDGPUOperand::addKImmFP32Operands(MCInst &Inst, unsigned N) const { + APInt Literal(64, Imm.Val); + if (Imm.IsFPImm) { // We got fp literal + bool lost; + APFloat FPLiteral(APFloat::IEEEdouble, Literal); + FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); + } else { // We got int literal token + Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); + } +} + +void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); +} + +//===----------------------------------------------------------------------===// +// AsmParser +//===----------------------------------------------------------------------===// + static int getRegClass(RegisterKind Is, unsigned RegWidth) { if (Is == IS_VGPR) { switch (RegWidth) { @@ -952,20 +1186,18 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { const auto &Tok = Parser.getTok(); SMLoc StartLoc = Tok.getLoc(); SMLoc EndLoc = Tok.getEndLoc(); - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - RegisterKind RegKind; unsigned Reg, RegNum, RegWidth; if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { return nullptr; } - return AMDGPUOperand::CreateReg(Reg, StartLoc, EndLoc, - TRI, &getSTI(), false); + return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); } AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseImm(OperandVector &Operands) { + // TODO: add syntactic sugar for 1/(2*PI) bool Minus = false; if (getLexer().getKind() == AsmToken::Minus) { Minus = true; @@ -978,28 +1210,21 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) { int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) { - Error(S, "invalid immediate: only 32-bit values are legal"); - return MatchOperand_ParseFail; - } - if (Minus) IntVal *= -1; - Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); return MatchOperand_Success; } case AsmToken::Real: { - // FIXME: We should emit an error if a double precisions floating-point - // value is used. I'm not sure the best way to detect this. int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - APFloat F((float)BitsToDouble(IntVal)); + APFloat F(BitsToDouble(IntVal)); if (Minus) F.changeSign(); Operands.push_back( - AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S, + AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, AMDGPUOperand::ImmTyNone, true)); return MatchOperand_Success; } @@ -1505,11 +1730,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { SMLoc S = Tok.getLoc(); const MCExpr *Expr = nullptr; if (!Parser.parseExpression(Expr)) { - Operands.push_back(AMDGPUOperand::CreateExpr(Expr, S)); + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); return MatchOperand_Success; } - Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc())); + Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), Tok.getLoc())); Parser.Lex(); return MatchOperand_Success; } @@ -1543,7 +1768,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, SMLoc NameLoc, OperandVector &Operands) { // Add the instruction mnemonic Name = parseMnemonicSuffix(Name); - Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc)); + Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); while (!getLexer().is(AsmToken::EndOfStatement)) { AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); @@ -1618,7 +1843,7 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, return MatchOperand_ParseFail; } - Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); return MatchOperand_Success; } @@ -1650,7 +1875,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, } } - Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -1825,7 +2050,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { } while(getLexer().isNot(AsmToken::EndOfStatement)); break; } - Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S)); return MatchOperand_Success; } @@ -1930,7 +2155,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { } break; } - Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); return MatchOperand_Success; } @@ -2113,7 +2338,7 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { } break; } - Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); return MatchOperand_Success; } @@ -2135,12 +2360,12 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { int64_t Imm; if (getParser().parseAbsoluteExpression(Imm)) return MatchOperand_ParseFail; - Operands.push_back(AMDGPUOperand::CreateImm(Imm, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); return MatchOperand_Success; } case AsmToken::Identifier: - Operands.push_back(AMDGPUOperand::CreateExpr( + Operands.push_back(AMDGPUOperand::CreateExpr(this, MCSymbolRefExpr::create(getContext().getOrCreateSymbol( Parser.getTok().getString()), getContext()), S)); Parser.Lex(); @@ -2153,15 +2378,15 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { //===----------------------------------------------------------------------===// AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyGLC); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySLC); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE); } void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, @@ -2284,23 +2509,23 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDMask); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyUNorm); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDA); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyR128); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE); } //===----------------------------------------------------------------------===// @@ -2321,11 +2546,11 @@ bool AMDGPUOperand::isSMRDLiteralOffset() const { } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } //===----------------------------------------------------------------------===// @@ -2458,8 +2683,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (Op.isRegOrImmWithInputMods()) { - // only fp modifiers allowed in VOP3 + if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) { Op.addRegOrImmWithFPInputModsOperands(Inst, 2); } else if (Op.isImm()) { OptionalIdx[Op.getImmTy()] = I; @@ -2605,21 +2829,20 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { } Parser.Lex(); // eat last token - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, - AMDGPUOperand::ImmTyDppCtrl)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); return MatchOperand_Success; } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { - return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); + return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { - return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); + return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { - return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); } void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { @@ -2634,8 +2857,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments - if (Op.isRegOrImmWithInputMods()) { - // Only float modifiers supported in DPP + if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) { Op.addRegOrImmWithFPInputModsOperands(Inst, 2); } else if (Op.isDPPCtrl()) { Op.addImmOperands(Inst, 1); @@ -2684,7 +2906,7 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, return MatchOperand_ParseFail; } - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); return MatchOperand_Success; } @@ -2711,8 +2933,7 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { return MatchOperand_ParseFail; } - Operands.push_back(AMDGPUOperand::CreateImm(Int, S, - AMDGPUOperand::ImmTySdwaDstUnused)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); return MatchOperand_Success; } @@ -2746,8 +2967,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Op.Reg.RegNo == AMDGPU::VCC) { // VOPC sdwa use "vcc" token as dst. Skip it. continue; - } else if (Op.isRegOrImmWithInputMods()) { - Op.addRegOrImmWithInputModsOperands(Inst, 2); + } else if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) { + Op.addRegOrImmWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments OptionalIdx[Op.getImmTy()] = I; @@ -2817,14 +3038,16 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; case MCK_offen: return Operand.isOffen() ? Match_Success : Match_InvalidOperand; - case MCK_SSrc32: + case MCK_SSrcB32: // When operands have expression values, they will return true for isToken, // because it is not possible to distinguish between a token and an // expression at parse time. MatchInstructionImpl() will always try to // match an operand as a token, when isToken returns true, and when the // name of the expression is not a valid token, the match will fail, // so we need to handle it here. - return Operand.isSSrc32() ? Match_Success : Match_InvalidOperand; + return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; + case MCK_SSrcF32: + return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; case MCK_SoppBrTarget: return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; default: return Match_InvalidOperand; diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 7dd0f009533..6556830f833 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -359,7 +359,7 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) { else if (Imm == DoubleToBits(-4.0)) O << "-4.0"; else { - assert(isUInt<32>(Imm)); + assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882); // In rare situations, we will have a 32-bit literal in a 64-bit // operand. This is technically allowed for the encoding of s_mov_b64. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index ab7c830bd4f..2e5286e1b3b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -18,6 +18,7 @@ #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" @@ -38,11 +39,9 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - /// \brief Can this operand also contain immediate values? - bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; - /// \brief Encode an fp or int literal - uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const; + uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize, + const MCSubtargetInfo &STI) const; public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, @@ -76,14 +75,6 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, return new SIMCCodeEmitter(MCII, MRI, Ctx); } -bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc, - unsigned OpNo) const { - unsigned OpType = Desc.OpInfo[OpNo].OperandType; - - return OpType == AMDGPU::OPERAND_REG_IMM32 || - OpType == AMDGPU::OPERAND_REG_INLINE_C; -} - // Returns the encoding value to use if the given integer is an integer inline // immediate value, or 0 if it is not. template <typename IntTy> @@ -97,7 +88,7 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) { return 0; } -static uint32_t getLit32Encoding(uint32_t Val) { +static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) { uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val)); if (IntImm != 0) return IntImm; @@ -126,10 +117,13 @@ static uint32_t getLit32Encoding(uint32_t Val) { if (Val == FloatToBits(-4.0f)) return 247; + if (AMDGPU::isVI(STI) && Val == 0x3e22f983) // 1/(2*pi) + return 248; + return 255; } -static uint32_t getLit64Encoding(uint64_t Val) { +static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) { uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val)); if (IntImm != 0) return IntImm; @@ -158,11 +152,15 @@ static uint32_t getLit64Encoding(uint64_t Val) { if (Val == DoubleToBits(-4.0)) return 247; + if (AMDGPU::isVI(STI) && Val == 0x3fc45f306dc9c882) // 1/(2*pi) + return 248; + return 255; } uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, - unsigned OpSize) const { + unsigned OpSize, + const MCSubtargetInfo &STI) const { int64_t Imm; if (MO.isExpr()) { @@ -182,11 +180,11 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, } if (OpSize == 4) - return getLit32Encoding(static_cast<uint32_t>(Imm)); + return getLit32Encoding(static_cast<uint32_t>(Imm), STI); assert(OpSize == 8); - return getLit64Encoding(static_cast<uint64_t>(Imm)); + return getLit64Encoding(static_cast<uint64_t>(Imm), STI); } void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -208,7 +206,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) { // Check if this operand should be encoded as [SV]Src - if (!isSrcOperand(Desc, i)) + if (!AMDGPU::isSISrcOperand(Desc, i)) continue; int RCID = Desc.OpInfo[i].RegClass; @@ -216,7 +214,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, // Is this operand a literal immediate? const MCOperand &Op = MI.getOperand(i); - if (getLitEncoding(Op, RC.getSize()) != 255) + if (getLitEncoding(Op, RC.getSize(), STI) != 255) continue; // Yes! Encode it @@ -280,11 +278,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, } const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); - if (isSrcOperand(Desc, OpNo)) { - int RCID = Desc.OpInfo[OpNo].RegClass; - const MCRegisterClass &RC = MRI.getRegClass(RCID); - - uint32_t Enc = getLitEncoding(MO, RC.getSize()); + if (AMDGPU::isSISrcOperand(Desc, OpNo)) { + uint32_t Enc = getLitEncoding(MO, + AMDGPU::getRegOperandSize(&MRI, Desc, OpNo), + STI); if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4)) return Enc; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 7eac83c655e..744c2e2f4e4 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -49,14 +49,17 @@ enum { namespace llvm { namespace AMDGPU { enum OperandType { - /// Operand with register or 32-bit immediate - OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET, - /// Operand with register or inline constant - OPERAND_REG_INLINE_C, - - /// Operand with 32-bit immediate that uses the constant bus. The standard - /// OPERAND_IMMEDIATE should be used for special immediates such as source - /// modifiers. + /// Operands with register or 32-bit immediate + OPERAND_REG_IMM32_INT = MCOI::OPERAND_FIRST_TARGET, + OPERAND_REG_IMM32_FP, + /// Operands with register or inline constant + OPERAND_REG_INLINE_C_INT, + OPERAND_REG_INLINE_C_FP, + + // Operand for source modifiers for VOP instructions + OPERAND_INPUT_MODS, + + /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32 }; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4c69a39312e..c84847f2e0e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1689,9 +1689,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, return false; } break; - case AMDGPU::OPERAND_REG_IMM32: + case AMDGPU::OPERAND_REG_IMM32_INT: + case AMDGPU::OPERAND_REG_IMM32_FP: break; - case AMDGPU::OPERAND_REG_INLINE_C: + case AMDGPU::OPERAND_REG_INLINE_C_INT: + case AMDGPU::OPERAND_REG_INLINE_C_FP: if (isLiteralConstant(MI.getOperand(i), RI.getRegClass(RegClass)->getSize())) { ErrInfo = "Illegal immediate value for operand."; @@ -2030,8 +2032,8 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, // In order to be legal, the common sub-class must be equal to the // class of the current operand. For example: // - // v_mov_b32 s0 ; Operand defined as vsrc_32 - // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL + // v_mov_b32 s0 ; Operand defined as vsrc_b32 + // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL // // s_sendmsg 0, s0 ; Operand defined as m0reg // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ad129b80a40..f68eef29ffc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -454,29 +454,56 @@ def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; } // End OperandType = "OPERAND_IMMEDIATE" +// 32-bit VALU immediate operand that uses the constant bus. +def KImmFP32MatchClass : AsmOperandClass { + let Name = "KImmFP32"; + let PredicateMethod = "isKImmFP32"; + let ParserMethod = "parseImm"; + let RenderMethod = "addKImmFP32Operands"; +} + +def f32kimm : Operand<i32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_KIMM32"; + let PrintMethod = "printU32ImmOperand"; + let ParserMatchClass = KImmFP32MatchClass; +} + def VOPDstS64 : VOPDstOperand <SReg_64>; -def FPInputModsMatchClass : AsmOperandClass { - let Name = "RegOrImmWithFPInputMods"; +class FPInputModsMatchClass <int opSize> : AsmOperandClass { + let Name = "RegOrImmWithFP"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithFPInputMods"; - let PredicateMethod = "isRegOrImmWithInputMods"; + let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; } +def FP32InputModsMatchClass : FPInputModsMatchClass<32>; +def FP64InputModsMatchClass : FPInputModsMatchClass<64>; -def FPInputMods : Operand <i32> { +class InputMods <AsmOperandClass matchClass> : Operand <i32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_INPUT_MODS"; + let ParserMatchClass = matchClass; +} + +class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { let PrintMethod = "printOperandAndFPInputMods"; - let ParserMatchClass = FPInputModsMatchClass; } +def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; +def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; -def IntInputModsMatchClass : AsmOperandClass { - let Name = "RegOrImmWithIntInputMods"; +class IntInputModsMatchClass <int opSize> : AsmOperandClass { + let Name = "RegOrImmWithInt"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithIntInputMods"; - let PredicateMethod = "isRegOrImmWithInputMods"; + let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; } +def Int32InputModsMatchClass : IntInputModsMatchClass<32>; +def Int64InputModsMatchClass : IntInputModsMatchClass<64>; -def IntInputMods: Operand <i32> { +class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { let PrintMethod = "printOperandAndIntInputMods"; - let ParserMatchClass = IntInputModsMatchClass; } +def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; +def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; //===----------------------------------------------------------------------===// // Complex patterns @@ -605,7 +632,13 @@ class getVALUDstForVT<ValueType VT> { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT<ValueType VT> { - RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32); + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); + RegisterOperand ret = !if(isFP, + !if(!eq(VT.Size, 64), VSrc_f64, VSrc_f32), + !if(!eq(VT.Size, 64), VSrc_b64, VSrc_b32)); } // Returns the vreg register class to use for source operand given VT @@ -617,14 +650,22 @@ class getVregSrcForVT<ValueType VT> { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT<ValueType VT> { + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); RegisterOperand ret = - !if(!eq(VT.Size, 64), - VCSrc_64, - !if(!eq(VT.Value, i1.Value), - SCSrc_64, - VCSrc_32 - ) - ); + !if(!eq(VT.Size, 64), + !if(isFP, + VCSrc_f64, + VCSrc_b64), + !if(!eq(VT.Value, i1.Value), + SCSrc_b64, + !if(isFP, + VCSrc_f32, + VCSrc_b32) + ) + ); } // Returns 1 if the source arguments have modifiers, 0 if they do not. @@ -636,6 +677,17 @@ class hasModifiers<ValueType SrcVT> { 0)); } +// Return type of input modifiers operand for specified input operand +class getSrcMod <ValueType VT> { + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); + Operand ret = !if(!eq(VT.Size, 64), + !if(isFP, FP64InputMods, Int64InputMods), + !if(isFP, FP32InputMods, Int32InputMods)); +} + // Returns the input arguments for VOP[12C] instructions for the given SrcVT. class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 @@ -646,7 +698,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { // Returns the input arguments for VOP3 instructions for the given SrcVT. class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, - bit HasModifiers> { + bit HasModifiers, Operand Src0Mod, Operand Src1Mod, + Operand Src2Mod> { dag ret = !if (!eq(NumSrcArgs, 0), @@ -656,7 +709,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, !if (!eq(NumSrcArgs, 1), !if (!eq(HasModifiers, 1), // VOP1 with modifiers - (ins FPInputMods:$src0_modifiers, Src0RC:$src0, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod:$clamp, omod:$omod) /* else */, // VOP1 without modifiers @@ -665,8 +718,8 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, !if (!eq(NumSrcArgs, 2), !if (!eq(HasModifiers, 1), // VOP 2 with modifiers - (ins FPInputMods:$src0_modifiers, Src0RC:$src0, - FPInputMods:$src1_modifiers, Src1RC:$src1, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, clampmod:$clamp, omod:$omod) /* else */, // VOP2 without modifiers @@ -675,9 +728,9 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, /* NumSrcArgs == 3 */, !if (!eq(HasModifiers, 1), // VOP3 with modifiers - (ins FPInputMods:$src0_modifiers, Src0RC:$src0, - FPInputMods:$src1_modifiers, Src1RC:$src1, - FPInputMods:$src2_modifiers, Src2RC:$src2, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, clampmod:$clamp, omod:$omod) /* else */, // VOP3 without modifiers @@ -686,7 +739,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, } class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, - bit HasModifiers> { + bit HasModifiers, Operand Src0Mod, Operand Src1Mod> { dag ret = !if (!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) @@ -695,7 +748,7 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, !if (!eq(NumSrcArgs, 1), !if (!eq(HasModifiers, 1), // VOP1_DPP with modifiers - (ins FPInputMods:$src0_modifiers, Src0RC:$src0, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* else */, @@ -706,8 +759,8 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, /* NumSrcArgs == 2 */, !if (!eq(HasModifiers, 1), // VOP2_DPP with modifiers - (ins FPInputMods:$src0_modifiers, Src0RC:$src0, - FPInputMods:$src1_modifiers, Src1RC:$src1, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) /* else */, @@ -719,48 +772,45 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, } class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs, - bit HasFloatModifiers, ValueType DstVT> { + bit HasFloatModifiers, Operand Src0Mod, Operand Src1Mod, + ValueType DstVT> { dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) (ins), !if(!eq(NumSrcArgs, 1), - !if(HasFloatModifiers, + !if(HasFloatModifiers, // VOP1_SDWA with float modifiers - (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, + (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel) - /* else */, - // VOP1_SDWA with sext modifier - (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, + src0_sel:$src0_sel), + // VOP1_SDWA with int modifiers + (ins Src0Mod:$src0_imodifiers, Src0RC:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel) - /* endif */) + src0_sel:$src0_sel)) /* NumSrcArgs == 2 */, !if(HasFloatModifiers, !if(!eq(DstVT.Size, 1), // VOPC_SDWA with float modifiers - (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, - FPInputMods:$src1_fmodifiers, Src1RC:$src1, + (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0, + Src1Mod:$src1_fmodifiers, Src1RC:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), // VOP2_SDWA or VOPC_SDWA with float modifiers - (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, - FPInputMods:$src1_fmodifiers, Src1RC:$src1, + (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0, + Src1Mod:$src1_fmodifiers, Src1RC:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel) - ), - /* else */ - !if(!eq(DstVT.Size, 1), - // VOPC_SDWA with sext modifiers - (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, - IntInputMods:$src1_imodifiers, Src1RC:$src1, - clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), - // VOP2_SDWA or VOPC_SDWA with sext modifier - (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, - IntInputMods:$src1_imodifiers, Src1RC:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel) - ) + src0_sel:$src0_sel, src1_sel:$src1_sel)), + + !if(!eq(DstVT.Size, 1), + // VOPC_SDWA with int modifiers + (ins Src0Mod:$src0_imodifiers, Src0RC:$src0, + Src1Mod:$src1_imodifiers, Src1RC:$src1, + clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA or VOPC_SDWA with int modifiers + (ins Src0Mod:$src0_imodifiers, Src0RC:$src0, + Src1Mod:$src1_imodifiers, Src1RC:$src1, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel)) /* endif */))); } @@ -885,6 +935,9 @@ class VOPProfile <list<ValueType> _ArgVT> { field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret; field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret; + field Operand Src0Mod = getSrcMod<Src0VT>.ret; + field Operand Src1Mod = getSrcMod<Src1VT>.ret; + field Operand Src2Mod = getSrcMod<Src2VT>.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); field bit HasDst32 = HasDst; @@ -904,9 +957,11 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, - HasModifiers>.ret; - field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret; - field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers, DstVT>.ret; + HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret; + field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, + HasModifiers, Src0Mod, Src1Mod>.ret; + field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, + HasModifiers, Src0Mod, Src1Mod, DstVT>.ret; field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; @@ -968,12 +1023,12 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Src0RC64 = VOPDstOperand<VGPR_32>; let Outs = (outs); - let Ins32 = (ins Src0RC32:$vdst, VSrc_32:$src0); - let Ins64 = (ins Src0RC64:$vdst, VSrc_32:$src0); + let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); + let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, IntInputMods:$src0_imodifiers, VCSrc_32:$src0, + let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); @@ -997,12 +1052,12 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { // Write out to vcc or arbitrary SGPR and read in from vcc or // arbitrary SGPR. def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { - // We use VCSrc_32 to exclude literal constants, even though the + // We use VCSrc_b32 to exclude literal constants, even though the // encoding normally allows them since the implicit VCC use means // using one would always violate the constant bus // restriction. SGPRs are still allowed because it should // technically be possible to use VCC again as src0. - let Src0RC32 = VCSrc_32; + let Src0RC32 = VCSrc_b32; let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; let Outs32 = (outs DstRC:$vdst); @@ -1015,7 +1070,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { // Read in from vcc or arbitrary SGPR def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { - let Src0RC32 = VCSrc_32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. + let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. let Asm32 = "$vdst, $src0, $src1, vcc"; let Asm64 = "$vdst, $src0, $src1, $src2"; let Outs32 = (outs DstRC:$vdst); @@ -1052,10 +1107,10 @@ class VOPC_Profile<ValueType vt0, ValueType vt1 = vt0> : VOPProfile <[i1, vt0, v } class VOPC_Class_Profile<ValueType vt> : VOPC_Profile<vt, i32> { - let Ins64 = (ins FPInputMods:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); + let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$sdst, $src0_modifiers, $src1"; - let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC64:$src0, - IntInputMods:$src1_imodifiers, Src1RC64:$src1, + let InsSDWA = (ins Src0Mod:$src0_fmodifiers, Src0RC64:$src0, + Int32InputMods:$src1_imodifiers, Src1RC64:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel"; @@ -1075,26 +1130,26 @@ def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32kimm:$imm); + field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm); field string Asm32 = "$vdst, $src0, $src1, $imm"; field bit HasExt = 0; } def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins32 = (ins VCSrc_32:$src0, u32kimm:$imm, VGPR_32:$src1); + field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1); field string Asm32 = "$vdst, $src0, $imm, $src1"; field bit HasExt = 0; } def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, - HasModifiers>.ret; - let InsDPP = (ins FPInputMods:$src0_modifiers, Src0RC32:$src0, - FPInputMods:$src1_modifiers, Src1RC32:$src1, + HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret; + let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0, + FP32InputMods:$src1_modifiers, Src1RC32:$src1, VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC32:$src0, - FPInputMods:$src1_fmodifiers, Src1RC32:$src1, + let InsSDWA = (ins FP32InputMods:$src0_fmodifiers, Src0RC32:$src0, + FP32InputMods:$src1_fmodifiers, Src1RC32:$src1, VGPR_32:$src2, // stub argument clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); @@ -1968,11 +2023,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName, SDPatternOperator node = null_frag> : VOP3_Helper < op, opName, (outs P.DstRC.RegClass:$vdst), - (ins FPInputMods:$src0_modifiers, P.Src0RC64:$src0, - FPInputMods:$src1_modifiers, P.Src1RC64:$src1, - FPInputMods:$src2_modifiers, P.Src2RC64:$src2, - clampmod:$clamp, - omod:$omod), + P.Ins64, "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, @@ -2086,7 +2137,7 @@ multiclass MTBUF_Store_Helper <bits<3> op, string opName, op, opName, (outs), (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, - SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset), + SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", [] >; @@ -2100,7 +2151,7 @@ multiclass MTBUF_Load_Helper <bits<3> op, string opName, op, opName, (outs regClass:$dst), (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc, - i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset), + i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", [] >; @@ -2262,13 +2313,13 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, defm _ADDR64 : MUBUFAtomicAddr64_m < op, name#"_addr64", (outs), (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, slc:$slc), + SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$slc", [], 0 >; defm _OFFSET : MUBUFAtomicOffset_m < op, name#"_offset", (outs), - (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, + (ins rc:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, off, $srsrc, $soffset$offset$slc", [], 0 >; @@ -2276,7 +2327,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, let offen = 1, idxen = 0 in { defm _OFFEN : MUBUFAtomicOther_m < op, name#"_offen", (outs), - (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$slc", [], 0 >; @@ -2285,7 +2336,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, let offen = 0, idxen = 1 in { defm _IDXEN : MUBUFAtomicOther_m < op, name#"_idxen", (outs), - (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$slc", [], 0 >; @@ -2294,7 +2345,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, let offen = 1, idxen = 1 in { defm _BOTHEN : MUBUFAtomicOther_m < op, name#"_bothen", (outs), - (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$slc", [], 0 @@ -2310,7 +2361,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, defm _RTN_ADDR64 : MUBUFAtomicAddr64_m < op, name#"_rtn_addr64", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, slc:$slc), + SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset glc$slc", [(set vt:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, @@ -2319,7 +2370,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, defm _RTN_OFFSET : MUBUFAtomicOffset_m < op, name#"_rtn_offset", (outs rc:$vdata), - (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, off, $srsrc, $soffset$offset glc$slc", [(set vt:$vdata, @@ -2330,7 +2381,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, let offen = 1, idxen = 0 in { defm _RTN_OFFEN : MUBUFAtomicOther_m < op, name#"_rtn_offen", (outs rc:$vdata), - (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset offen$offset glc$slc", [], 1 @@ -2340,7 +2391,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, let offen = 0, idxen = 1 in { defm _RTN_IDXEN : MUBUFAtomicOther_m < op, name#"_rtn_idxen", (outs rc:$vdata), - (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset glc$slc", [], 1 @@ -2350,7 +2401,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, let offen = 1, idxen = 1 in { defm _RTN_BOTHEN : MUBUFAtomicOther_m < op, name#"_rtn_bothen", (outs rc:$vdata), - (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset glc$slc", [], 1 @@ -2370,7 +2421,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let mayLoad = 1, mayStore = 0 in { let offen = 0, idxen = 0, vaddr = 0 in { defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata), - (ins SReg_128:$srsrc, SCSrc_32:$soffset, + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe", [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, @@ -2381,7 +2432,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let offen = 1, idxen = 0 in { defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata), (ins VGPR_32:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, glc:$glc, slc:$slc, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$glc$slc$tfe", []>; } @@ -2389,14 +2440,14 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let offen = 0, idxen = 1 in { defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata), (ins VGPR_32:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, glc:$glc, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>; } let offen = 1, idxen = 1 in { defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata), - (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>; } @@ -2404,7 +2455,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass, let offen = 0, idxen = 0 in { defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata), (ins VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$glc$slc$tfe", [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, @@ -2420,7 +2471,7 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, let mayLoad = 0, mayStore = 1 in { let offen = 0, idxen = 0, vaddr = 0 in { defm _OFFSET : MUBUF_m <op, name#"_offset",(outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe", [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, @@ -2430,7 +2481,7 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, let offen = 1, idxen = 0 in { defm _OFFEN : MUBUF_m <op, name#"_offen", (outs), (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, glc:$glc, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset offen"# "$offset$glc$slc$tfe", []>; @@ -2439,14 +2490,14 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, let offen = 0, idxen = 1 in { defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs), (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, offset:$offset, glc:$glc, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>; } let offen = 1, idxen = 1 in { defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs), - (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>; } @@ -2454,7 +2505,7 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass, let offen = 0, idxen = 0 in { defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs), (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset addr64"# diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 63d2239637c..94506f2fcd0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -848,7 +848,7 @@ defm V_READLANE_B32 : VOP2SI_3VI_m < vop3 <0x001, 0x289>, "v_readlane_b32", (outs SReg_32:$vdst), - (ins VGPR_32:$src0, SCSrc_32:$src1), + (ins VGPR_32:$src0, SCSrc_b32:$src1), "v_readlane_b32 $vdst, $src0, $src1", [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))] >; @@ -857,7 +857,7 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < vop3 <0x002, 0x28a>, "v_writelane_b32", (outs VGPR_32:$vdst), - (ins SReg_32:$src0, SCSrc_32:$src1), + (ins SReg_32:$src0, SCSrc_b32:$src1), "v_writelane_b32 $vdst, $src0, $src1" >; @@ -1179,7 +1179,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { // For use in patterns def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), - (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []> { + (ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> { let isPseudo = 1; let isCodeGenOnly = 1; let usesCustomInserter = 1; @@ -1187,7 +1187,7 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), // 64-bit vector move instruction. This is mainly used by the SIFoldOperands // pass to enable folding of inline immediates. -def V_MOV_B64_PSEUDO : PseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_64:$src0)> { +def V_MOV_B64_PSEUDO : PseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)> { let VALU = 1; } } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] @@ -1263,14 +1263,14 @@ def SI_ELSE_BREAK : CFPseudoInstSI < let Uses = [EXEC], Defs = [EXEC,VCC] in { def SI_KILL : PseudoInstSI < - (outs), (ins VSrc_32:$src), + (outs), (ins VSrc_b32:$src), [(AMDGPUkill i32:$src)]> { let isConvergent = 1; let usesCustomInserter = 1; } def SI_KILL_TERMINATOR : SPseudoInstSI < - (outs), (ins VSrc_32:$src)> { + (outs), (ins VSrc_b32:$src)> { let isTerminator = 1; } @@ -1288,7 +1288,7 @@ def SI_PS_LIVE : PseudoInstSI < // s_mov_b32 rather than a copy of another initialized // register. MachineCSE skips copies, and we don't want to have to // fold operands before it runs. -def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_32:$src)> { +def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> { let Defs = [M0]; let usesCustomInserter = 1; let isAsCheapAsAMove = 1; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index b6c5fb9d749..86de0e2954c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -791,14 +791,16 @@ bool SIRegisterInfo::shouldRewriteCopySrc( } bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { - return OpType == AMDGPU::OPERAND_REG_IMM32; + return OpType == AMDGPU::OPERAND_REG_IMM32_INT || + OpType == AMDGPU::OPERAND_REG_IMM32_FP; } bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { if (opCanUseLiteralConstant(OpType)) return true; - return OpType == AMDGPU::OPERAND_REG_INLINE_C; + return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT || + OpType == AMDGPU::OPERAND_REG_INLINE_C_FP; } // FIXME: Most of these are flexible with HSA and we don't need to reserve them diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 933266604ba..657bed17a59 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -358,56 +358,59 @@ def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { // Register operands //===----------------------------------------------------------------------===// -class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> { - let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_REG_IMM32"; -} - -class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> { - let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_REG_INLINE_C"; -} - class RegImmMatcher<string name> : AsmOperandClass { let Name = name; let RenderMethod = "addRegOrImmOperands"; } +multiclass SIRegOperand <string rc, string MatchName, string opType> { + let OperandNamespace = "AMDGPU" in { + + def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { + let OperandType = opType#"_INT"; + let ParserMatchClass = RegImmMatcher<MatchName#"B32">; + } + + def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> { + let OperandType = opType#"_FP"; + let ParserMatchClass = RegImmMatcher<MatchName#"F32">; + } + + def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> { + let OperandType = opType#"_INT"; + let ParserMatchClass = RegImmMatcher<MatchName#"B64">; + } + + def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> { + let OperandType = opType#"_FP"; + let ParserMatchClass = RegImmMatcher<MatchName#"F64">; + } + } +} + +multiclass RegImmOperand <string rc, string MatchName> + : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM32">; + +multiclass RegInlineOperand <string rc, string MatchName> + : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">; + //===----------------------------------------------------------------------===// // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// -def SSrc_32 : RegImmOperand<SReg_32> { - let ParserMatchClass = RegImmMatcher<"SSrc32">; -} - -def SSrc_64 : RegImmOperand<SReg_64> { - let ParserMatchClass = RegImmMatcher<"SSrc64">; -} +defm SSrc : RegImmOperand<"SReg", "SSrc">; //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// -def SCSrc_32 : RegInlineOperand<SReg_32> { - let ParserMatchClass = RegImmMatcher<"SCSrc32">; -} - -def SCSrc_64 : RegInlineOperand<SReg_64> { - let ParserMatchClass = RegImmMatcher<"SCSrc64">; -} +defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// -def VSrc_32 : RegImmOperand<VS_32> { - let ParserMatchClass = RegImmMatcher<"VSrc32">; -} - -def VSrc_64 : RegImmOperand<VS_64> { - let ParserMatchClass = RegImmMatcher<"VSrc64">; -} +defm VSrc : RegImmOperand<"VS", "VSrc">; //===----------------------------------------------------------------------===// // VSrc_* Operands with an VGPR @@ -424,10 +427,4 @@ def VRegSrc_32 : RegisterOperand<VGPR_32> { // VCSrc_* Operands with an SGPR, VGPR or an inline constant //===----------------------------------------------------------------------===// -def VCSrc_32 : RegInlineOperand<VS_32> { - let ParserMatchClass = RegImmMatcher<"VCSrc32">; -} - -def VCSrc_64 : RegInlineOperand<VS_64> { - let ParserMatchClass = RegImmMatcher<"VCSrc64">; -} +defm VCSrc : RegInlineOperand<"VS", "VCSrc">; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 5a8e2a1c203..7226f2002c4 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -57,24 +57,24 @@ class SOP1_Real<bits<8> op, SOP1_Pseudo ps> : } class SOP1_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo < - opName, (outs SReg_32:$sdst), (ins SSrc_32:$src0), + opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0), "$sdst, $src0", pattern >; class SOP1_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo < - opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0), + opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0), "$sdst, $src0", pattern >; // 64-bit input, 32-bit output. class SOP1_32_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo < - opName, (outs SReg_32:$sdst), (ins SSrc_64:$src0), + opName, (outs SReg_32:$sdst), (ins SSrc_b64:$src0), "$sdst, $src0", pattern >; // 32-bit input, 64-bit output. class SOP1_64_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo < - opName, (outs SReg_64:$sdst), (ins SSrc_32:$src0), + opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0), "$sdst, $src0", pattern >; @@ -254,22 +254,22 @@ class SOP2_Real<bits<7> op, SOP2_Pseudo ps> : class SOP2_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo < - opName, (outs SReg_32:$sdst), (ins SSrc_32:$src0, SSrc_32:$src1), + opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1), "$sdst, $src0, $src1", pattern >; class SOP2_64 <string opName, list<dag> pattern=[]> : SOP2_Pseudo < - opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0, SSrc_64:$src1), + opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1), "$sdst, $src0, $src1", pattern >; class SOP2_64_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo < - opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0, SSrc_32:$src1), + opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b32:$src1), "$sdst, $src0, $src1", pattern >; class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo < - opName, (outs SReg_64:$sdst), (ins SSrc_32:$src0, SSrc_32:$src1), + opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1), "$sdst, $src0, $src1", pattern >; @@ -277,23 +277,23 @@ let Defs = [SCC] in { // Carry out goes to SCC let isCommutable = 1 in { def S_ADD_U32 : SOP2_32 <"s_add_u32">; def S_ADD_I32 : SOP2_32 <"s_add_i32", - [(set i32:$sdst, (add SSrc_32:$src0, SSrc_32:$src1))] + [(set i32:$sdst, (add SSrc_b32:$src0, SSrc_b32:$src1))] >; } // End isCommutable = 1 def S_SUB_U32 : SOP2_32 <"s_sub_u32">; def S_SUB_I32 : SOP2_32 <"s_sub_i32", - [(set i32:$sdst, (sub SSrc_32:$src0, SSrc_32:$src1))] + [(set i32:$sdst, (sub SSrc_b32:$src0, SSrc_b32:$src1))] >; let Uses = [SCC] in { // Carry in comes from SCC let isCommutable = 1 in { def S_ADDC_U32 : SOP2_32 <"s_addc_u32", - [(set i32:$sdst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; + [(set i32:$sdst, (adde (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>; } // End isCommutable = 1 def S_SUBB_U32 : SOP2_32 <"s_subb_u32", - [(set i32:$sdst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; + [(set i32:$sdst, (sube (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>; } // End Uses = [SCC] @@ -614,13 +614,13 @@ class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt, } class SOPC_CMP_32<bits<7> op, string opName, PatLeaf cond = COND_NULL> - : SOPC_Helper<op, SSrc_32, i32, opName, cond>; + : SOPC_Helper<op, SSrc_b32, i32, opName, cond>; class SOPC_32<bits<7> op, string opName, list<dag> pattern = []> - : SOPC_Base<op, SSrc_32, SSrc_32, opName, pattern>; + : SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>; class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []> - : SOPC_Base<op, SSrc_64, SSrc_32, opName, pattern>; + : SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>; def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 1fac2664ae7..51b56e04e52 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -8,10 +8,13 @@ //===----------------------------------------------------------------------===// #include "AMDGPUBaseInfo.h" #include "AMDGPU.h" +#include "SIDefines.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" @@ -200,5 +203,72 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { return Reg; } +bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { + unsigned OpType = Desc.OpInfo[OpNo].OperandType; + + return OpType == AMDGPU::OPERAND_REG_IMM32_INT || + OpType == AMDGPU::OPERAND_REG_IMM32_FP || + OpType == AMDGPU::OPERAND_REG_INLINE_C_INT || + OpType == AMDGPU::OPERAND_REG_INLINE_C_FP; +} + +bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { + unsigned OpType = Desc.OpInfo[OpNo].OperandType; + + return OpType == AMDGPU::OPERAND_REG_IMM32_FP || + OpType == AMDGPU::OPERAND_REG_INLINE_C_FP; +} + +bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { + unsigned OpType = Desc.OpInfo[OpNo].OperandType; + + return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT || + OpType == AMDGPU::OPERAND_REG_INLINE_C_FP; +} + +unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, + unsigned OpNo) { + int RCID = Desc.OpInfo[OpNo].RegClass; + const MCRegisterClass &RC = MRI->getRegClass(RCID); + return RC.getSize(); +} + +bool isInlinableLiteral64(int64_t Literal, bool IsVI) { + if (Literal >= -16 && Literal <= 64) + return true; + + double D = BitsToDouble(Literal); + + if (D == 0.5 || D == -0.5 || + D == 1.0 || D == -1.0 || + D == 2.0 || D == -2.0 || + D == 4.0 || D == -4.0) + return true; + + if (IsVI && Literal == 0x3fc45f306dc9c882) + return true; + + return false; +} + +bool isInlinableLiteral32(int32_t Literal, bool IsVI) { + if (Literal >= -16 && Literal <= 64) + return true; + + float F = BitsToFloat(Literal); + + if (F == 0.5 || F == -0.5 || + F == 1.0 || F == -1.0 || + F == 2.0 || F == -2.0 || + F == 4.0 || F == -4.0) + return true; + + if (IsVI && Literal == 0x3e22f983) + return true; + + return false; +} + + } // End namespace AMDGPU } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 744d7fb89e2..1b8b8a07302 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -19,6 +19,8 @@ class FeatureBitset; class Function; class GlobalValue; class MCContext; +class MCInstrDesc; +class MCRegisterInfo; class MCSection; class MCSubtargetInfo; @@ -80,6 +82,23 @@ bool isVI(const MCSubtargetInfo &STI); /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); +/// \brief Can this operand also contain immediate values? +bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); + +/// \brief Is this floating-point operand? +bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); + +/// \brief Does this opearnd support only inlinable literals? +bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); + +/// \brief Get size of register operand +unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, + unsigned OpNo); + +/// \brief Is this literal inlinable +bool isInlinableLiteral64(int64_t Literal, bool IsVI); +bool isInlinableLiteral32(int32_t Literal, bool IsVI); + } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td index fc88f1fb2c3..3c5a8a963ac 100644 --- a/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -90,7 +90,7 @@ let isCommutable = 1 in { class SI2_VI3Alias <string name, Instruction inst> : InstAlias < name#" $dst, $src0, $src1", - (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0) + (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0) >, PredicateControl { let UseInstAsmMatchConverter = 0; let AsmVariantName = AMDGPUAsmVariants.VOP3; diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s new file mode 100644 index 00000000000..a552e6751ba --- /dev/null +++ b/llvm/test/MC/AMDGPU/literals.s @@ -0,0 +1,477 @@ +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI + +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI + +//---------------------------------------------------------------------------// +// fp literal, expected fp operand +//---------------------------------------------------------------------------// + +// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e] +v_fract_f64 v[0:1], 0.5 + +// SICI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e] +// VI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x50,0x00,0x7e] +v_sqrt_f64 v[0:1], -4.0 + +// SICI: v_log_clamp_f32_e32 v1, 0.5 ; encoding: [0xf0,0x4c,0x02,0x7e] +// NOVI: error: instruction not supported on this GPU +v_log_clamp_f32 v1, 0.5 + +// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e] +v_fract_f64 v[0:1], 0.5 + +// SICI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x38,0x00,0x7e] +v_trunc_f32 v0, 0.5 + +// SICI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x64,0x00,0x7e] +v_fract_f64 v[0:1], -1.0 + +// SICI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x38,0x00,0x7e] +v_trunc_f32 v0, -1.0 + +// SICI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x64,0x00,0x7e] +v_fract_f64 v[0:1], 4.0 + +// SICI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x38,0x00,0x7e] +v_trunc_f32 v0, 4.0 + +// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e] +v_fract_f64 v[0:1], 0.0 + +// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e] +v_trunc_f32 v0, 0.0 + +// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f] +// VI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f] +v_fract_f64 v[0:1], 1.5 + +// SICI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f] +// VI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f] +v_trunc_f32 v0, 1.5 + +// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0] +// VI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0] +v_fract_f64 v[0:1], -3.1415 + +// SICI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0] +// VI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0] +v_trunc_f32 v0, -3.1415 + +// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44] +// VI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44] +v_fract_f64 v[0:1], 100000000000000000000000.0 + +// SICI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65] +// VI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65] +v_trunc_f32 v0, 100000000000000000000000.0 + +// SICI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41] +// VI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41] +v_fract_f64 v[0:1], 10000000.0 + +// SICI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b] +// VI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b] +v_trunc_f32 v0, 10000000.0 + +// SICI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47] +// VI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47] +v_fract_f64 v[0:1], 3.402823e+38 + +// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f] +// VI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f] +v_trunc_f32 v0, 3.402823e+38 + +// SICI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38] +// VI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38] +v_fract_f64 v[0:1], 2.3509886e-38 + +// SICI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00] +// VI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00] +v_trunc_f32 v0, 2.3509886e-38 + +// SICI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31] +// VI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31] +v_fract_f64 v[0:1], 2.3509886e-70 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_trunc_f32 v0, 2.3509886e-70 + +//---------------------------------------------------------------------------// +// fp literal, expected int operand +//---------------------------------------------------------------------------// + +// SICI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 0.5 + +// SICI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x26] +v_and_b32_e32 v0, 0.5, v1 + +// SICI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00] +v_and_b32_e64 v0, 0.5, v1 + +// SICI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], -1.0 + +// SICI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x26] +v_and_b32_e32 v0, -1.0, v1 + +// SICI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00] +v_and_b32_e64 v0, -1.0, v1 + +// SICI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 4.0 + +// SICI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x26] +v_and_b32_e32 v0, 4.0, v1 + +// SICI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00] +v_and_b32_e64 v0, 4.0, v1 + +// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 0.0 + +// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26] +v_and_b32_e32 v0, 0.0, v1 + +// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00] +v_and_b32_e64 v0, 0.0, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 1.5 + +// SICI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f] +// VI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f] +v_and_b32_e32 v0, 1.5, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], -3.1415 + +// SICI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0] +// VI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0] +v_and_b32_e32 v0, -3.1415, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 100000000000000000000000.0 + +// SICI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65] +// VI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65] +v_and_b32_e32 v0, 100000000000000000000000.0, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 10000000.0 + +// SICI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b] +// VI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b] +v_and_b32_e32 v0, 10000000.0, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 3.402823e+38 + +// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f] +// VI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f] +v_and_b32_e32 v0, 3.402823e+38, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 2.3509886e-38 + +// SICI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00] +// VI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00] +v_and_b32_e32 v0, 2.3509886e-38, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 2.3509886e-70 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_and_b32_e32 v0, 2.3509886e-70, v1 + +//---------------------------------------------------------------------------// +// int literal, expected fp operand +//---------------------------------------------------------------------------// + +// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e] +v_trunc_f32_e32 v0, 0 + +// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e] +v_fract_f64_e32 v[0:1], 0 + +// SICI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00] +// VI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00] +v_trunc_f32_e64 v0, 0 + +// SICI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00] +// VI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00] +v_fract_f64_e64 v[0:1], 0 + +// SICI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x38,0x00,0x7e] +v_trunc_f32_e32 v0, -13 + +// SICI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x64,0x00,0x7e] +v_fract_f64_e32 v[0:1], -13 + +// SICI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x42,0xd3,0x8d,0x00,0x00,0x20] +// VI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x5c,0xd1,0x8d,0x00,0x00,0x20] +v_trunc_f32_e64 v0, -13 + +// SICI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x7c,0xd3,0x8d,0x00,0x00,0x20] +// VI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x72,0xd1,0x8d,0x00,0x00,0x20] +v_fract_f64_e64 v[0:1], -13 + +// SICI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x38,0x00,0x7e] +v_trunc_f32_e32 v0, 35 + +// SICI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x64,0x00,0x7e] +v_fract_f64_e32 v[0:1], 35 + +// SICI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00] +// VI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00] +v_trunc_f32_e64 v0, 35 + +// SICI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00] +// VI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00] +v_fract_f64_e64 v[0:1], 35 + +// SICI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00] +// VI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00] +v_trunc_f32_e32 v0, 1234 + +// SICI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00] +// VI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00] +v_fract_f64_e32 v[0:1], 1234 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_trunc_f32_e64 v0, 1234 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_fract_f64_e64 v[0:1], 1234 + +// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// VI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff] +v_trunc_f32_e32 v0, -54321 + +// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// VI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff] +v_fract_f64_e32 v[0:1], -54321 + +// SICI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde] +// VI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde] +v_trunc_f32_e32 v0, 0xdeadbeef + +// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde] +// VI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde] +v_fract_f64_e32 v[0:1], 0xdeadbeef + +// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e] +v_trunc_f32_e32 v0, 0xffffffff + +// SICI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff] +// VI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff] +v_fract_f64_e32 v[0:1], 0xffffffff + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_trunc_f32_e32 v0, 0x123456789abcdef0 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_fract_f64_e32 v[0:1], 0x123456789abcdef0 + +// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] +// VI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e] +v_trunc_f32_e32 v0, 0xffffffffffffffff + +// SICI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e] +// VI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x64,0x00,0x7e] +v_fract_f64_e32 v[0:1], 0xffffffffffffffff + +//---------------------------------------------------------------------------// +// int literal, expected int operand +//---------------------------------------------------------------------------// + +// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 0 + +// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26] +v_and_b32_e32 v0, 0, v1 + +// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00] +v_and_b32_e64 v0, 0, v1 + +// SICI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], -13 + +// SICI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x26] +v_and_b32_e32 v0, -13, v1 + +// SICI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00] +v_and_b32_e64 v0, -13, v1 + +// SICI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 35 + +// SICI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x26] +v_and_b32_e32 v0, 35, v1 + +// SICI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00] +// VI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00] +v_and_b32_e64 v0, 35, v1 + +// SICI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00] +// VI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00] +s_mov_b64_e32 s[0:1], 1234 + +// SICI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00] +// VI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00] +v_and_b32_e32 v0, 1234, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_and_b32_e64 v0, 1234, v1 + +// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff] +// VI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff] +s_mov_b64_e32 s[0:1], -54321 + +// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff] +// VI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff] +v_and_b32_e32 v0, -54321, v1 + +// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde] +// VI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde] +s_mov_b64_e32 s[0:1], 0xdeadbeef + +// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde] +// VI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde] +v_and_b32_e32 v0, 0xdeadbeef, v1 + +// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff] +// VI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff] +s_mov_b64_e32 s[0:1], 0xffffffff + +// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26] +v_and_b32_e32 v0, 0xffffffff, v1 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +s_mov_b64_e32 s[0:1], 0x123456789abcdef0 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_and_b32_e32 v0, 0x123456789abcdef0, v1 + +// SICI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x04,0x80,0xbe] +// VI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 0xffffffffffffffff + +// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] +// VI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26] +v_and_b32_e32 v0, 0xffffffffffffffff, v1 + +//---------------------------------------------------------------------------// +// 1/(2*PI) +//---------------------------------------------------------------------------// + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_trunc_f32_e32 v0, 0x3fc45f306dc9c882 + +// NOSICI: error: invalid operand for instruction +// VI: v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x64,0x00,0x7e] +v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 + +// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// VI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xf8,0x38,0x00,0x7e] +v_trunc_f32_e32 v0, 0x3e22f983 + +// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// VI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e] +v_fract_f64_e32 v[0:1], 0x3e22f983 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_trunc_f32_e64 v0, 0x3fc45f306dc9c882 + +// NOSICI: error: invalid operand for instruction +// VI: v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00] +v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 + +// NOSICI: error: invalid operand for instruction +// VI: v_trunc_f32_e64 v0, 0x3e22f983 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00] +v_trunc_f32_e64 v0, 0x3e22f983 + +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +v_fract_f64_e64 v[0:1], 0x3e22f983 + +// NOSICI: error: invalid operand for instruction +// VI: s_mov_b64 s[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x01,0x80,0xbe] +s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335 + +// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e] +// VI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xf8,0x02,0x00,0x26] +v_and_b32_e32 v0, 0.159154943091895317852646485335, v1 + +// NOSICI: error: invalid operand for instruction +// VI: v_and_b32_e64 v0, 0x3e22f983, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00] +v_and_b32_e64 v0, 0.159154943091895317852646485335, v1 + +// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f] +// VI: v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x64,0x00,0x7e] +v_fract_f64 v[0:1], 0.159154943091895317852646485335 + +// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// VI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xf8,0x38,0x00,0x7e] +v_trunc_f32 v0, 0.159154943091895317852646485335
\ No newline at end of file diff --git a/llvm/test/MC/AMDGPU/reg-syntax-extra.s b/llvm/test/MC/AMDGPU/reg-syntax-extra.s index 52892a4e5e2..f4a072930e3 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-extra.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-extra.s @@ -43,7 +43,7 @@ s_mov_b64 [tma_lo,tma_hi], s[2:3] // SICI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x04,0xee,0xbe] // VI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x01,0xee,0xbe] -v_mov_b32 [v1], [v2] +v_mov_b32_e32 [v1], [v2] // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] v_rcp_f64 [v1,v2], [v2,v3] diff --git a/llvm/test/MC/AMDGPU/regression/bug28165.s b/llvm/test/MC/AMDGPU/regression/bug28165.s new file mode 100644 index 00000000000..c9e3f5b60e4 --- /dev/null +++ b/llvm/test/MC/AMDGPU/regression/bug28165.s @@ -0,0 +1,11 @@ +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI + +// SICI: v_cmp_eq_f64_e32 vcc, 0.5, v[254:255] ; encoding: [0xf0,0xfc,0x45,0x7c] +// VI: v_cmp_eq_f64_e32 vcc, 0.5, v[254:255] ; encoding: [0xf0,0xfc,0xc5,0x7c] +v_cmp_eq_f64 vcc, 0.5, v[254:255] + +// GCN: v_cvt_f32_f64_e32 v0, 0.5 ; encoding: [0xf0,0x1e,0x00,0x7e] +v_cvt_f32_f64 v0, 0.5 diff --git a/llvm/test/MC/AMDGPU/regression/bug28413.s b/llvm/test/MC/AMDGPU/regression/bug28413.s index d5b2ff90cd9..425d996ef60 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28413.s +++ b/llvm/test/MC/AMDGPU/regression/bug28413.s @@ -19,10 +19,10 @@ v_cmpx_eq_u32 vcc, 3.125, v0 // SICI: v_cmpx_eq_u32_e32 vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0xa4,0x7d,0x00,0x00,0x48,0x40] // VI: v_cmpx_eq_u32_e32 vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0xb4,0x7d,0x00,0x00,0x48,0x40] -v_mov_b32 v0, 0.5 +v_mov_b32_e32 v0, 0.5 // GCN: v_mov_b32_e32 v0, 0.5 ; encoding: [0xf0,0x02,0x00,0x7e] -v_mov_b32 v0, 3.125 +v_mov_b32_e32 v0, 3.125 // GCN: v_mov_b32_e32 v0, 0x40480000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x48,0x40] v_add_i32 v0, vcc, 0.5, v0 diff --git a/llvm/test/MC/AMDGPU/sop1-err.s b/llvm/test/MC/AMDGPU/sop1-err.s index 4329451a650..65dbdb98ede 100644 --- a/llvm/test/MC/AMDGPU/sop1-err.s +++ b/llvm/test/MC/AMDGPU/sop1-err.s @@ -26,17 +26,6 @@ s_mov_b64 s1, s[0:1] s_mov_b64 s[0:1], s1 // GCN: error: invalid operand for instruction -// Immediate greater than 32-bits -s_mov_b32 s1, 0xfffffffff -// GCN: error: invalid immediate: only 32-bit values are legal - -// Immediate greater than 32-bits -s_mov_b64 s[0:1], 0xfffffffff -// GCN: error: invalid immediate: only 32-bit values are legal - -s_mov_b64 s[0:1], 0x0000000200000000 -// GCN: error: invalid immediate: only 32-bit values are legal - // FIXME: This shoudl probably say failed to parse. s_mov_b32 s // GCN: error: not a valid operand diff --git a/llvm/test/MC/AMDGPU/trap.s b/llvm/test/MC/AMDGPU/trap.s index 3695da518ca..52face5f41d 100644 --- a/llvm/test/MC/AMDGPU/trap.s +++ b/llvm/test/MC/AMDGPU/trap.s @@ -62,7 +62,7 @@ s_lshr_b32 ttmp8, ttmp8, 12 // SICI: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x78,0x8c,0x78,0x90] // VI: s_lshr_b32 ttmp8, ttmp8, 12 ; encoding: [0x78,0x8c,0x78,0x8f] -v_mov_b32 v1, ttmp8 +v_mov_b32_e32 v1, ttmp8 // SICI: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x78,0x02,0x02,0x7e] // VI: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x78,0x02,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index 22a4f91afef..e55c05e5c53 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -32,71 +32,71 @@ v_clrexcp_e32 v_nop // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] -v_mov_b32 v1, v2 +v_mov_b32_e32 v1, v2 // GCN: v_readfirstlane_b32 s1, v2 ; encoding: [0x02,0x05,0x02,0x7e] v_readfirstlane_b32 s1, v2 // GCN: v_cvt_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x07,0x02,0x7e] -v_cvt_i32_f64 v1, v[2:3] +v_cvt_i32_f64_e32 v1, v[2:3] // GCN: v_cvt_f64_i32_e32 v[1:2], v2 ; encoding: [0x02,0x09,0x02,0x7e] -v_cvt_f64_i32 v[1:2], v2 +v_cvt_f64_i32_e32 v[1:2], v2 // GCN: v_cvt_f32_i32_e32 v1, v2 ; encoding: [0x02,0x0b,0x02,0x7e] -v_cvt_f32_i32 v1, v2 +v_cvt_f32_i32_e32 v1, v2 // GCN: v_cvt_f32_u32_e32 v1, v2 ; encoding: [0x02,0x0d,0x02,0x7e] -v_cvt_f32_u32 v1, v2 +v_cvt_f32_u32_e32 v1, v2 // GCN: v_cvt_u32_f32_e32 v1, v2 ; encoding: [0x02,0x0f,0x02,0x7e -v_cvt_u32_f32 v1, v2 +v_cvt_u32_f32_e32 v1, v2 // GCN: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e] -v_cvt_i32_f32 v1, v2 +v_cvt_i32_f32_e32 v1, v2 // SICI: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e] // NOVI: error: instruction not supported on this GPU -v_mov_fed_b32 v1, v2 +v_mov_fed_b32_e32 v1, v2 // GCN: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e] -v_cvt_f16_f32 v1, v2 +v_cvt_f16_f32_e32 v1, v2 // GCN: v_cvt_f32_f16_e32 v1, v2 ; encoding: [0x02,0x17,0x02,0x7e] -v_cvt_f32_f16 v1, v2 +v_cvt_f32_f16_e32 v1, v2 // GCN: v_cvt_rpi_i32_f32_e32 v1, v2 ; encoding: [0x02,0x19,0x02,0x7e] -v_cvt_rpi_i32_f32 v1, v2 +v_cvt_rpi_i32_f32_e32 v1, v2 // GCN: v_cvt_flr_i32_f32_e32 v1, v2 ; encoding: [0x02,0x1b,0x02,0x7e] -v_cvt_flr_i32_f32 v1, v2 +v_cvt_flr_i32_f32_e32 v1, v2 // GCN: v_cvt_off_f32_i4_e32 v1, v2 ; encoding: [0x02,0x1d,0x02,0x7e] v_cvt_off_f32_i4_e32 v1, v2 // GCN: v_cvt_f32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x1f,0x02,0x7e] -v_cvt_f32_f64 v1, v[2:3] +v_cvt_f32_f64_e32 v1, v[2:3] // GCN: v_cvt_f64_f32_e32 v[1:2], v2 ; encoding: [0x02,0x21,0x02,0x7e] -v_cvt_f64_f32 v[1:2], v2 +v_cvt_f64_f32_e32 v[1:2], v2 // GCN: v_cvt_f32_ubyte0_e32 v1, v2 ; encoding: [0x02,0x23,0x02,0x7e] -v_cvt_f32_ubyte0 v1, v2 +v_cvt_f32_ubyte0_e32 v1, v2 // GCN: v_cvt_f32_ubyte1_e32 v1, v2 ; encoding: [0x02,0x25,0x02,0x7e] v_cvt_f32_ubyte1_e32 v1, v2 // GCN: v_cvt_f32_ubyte2_e32 v1, v2 ; encoding: [0x02,0x27,0x02,0x7e] -v_cvt_f32_ubyte2 v1, v2 +v_cvt_f32_ubyte2_e32 v1, v2 // GCN: v_cvt_f32_ubyte3_e32 v1, v2 ; encoding: [0x02,0x29,0x02,0x7e] -v_cvt_f32_ubyte3 v1, v2 +v_cvt_f32_ubyte3_e32 v1, v2 // GCN: v_cvt_u32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x2b,0x02,0x7e] -v_cvt_u32_f64 v1, v[2:3] +v_cvt_u32_f64_e32 v1, v[2:3] // GCN: v_cvt_f64_u32_e32 v[1:2], v2 ; encoding: [0x02,0x2d,0x02,0x7e] -v_cvt_f64_u32 v[1:2], v2 +v_cvt_f64_u32_e32 v[1:2], v2 // NOSI: error: instruction not supported on this GPU // NOSI: v_trunc_f64_e32 v[1:2], v[2:3] @@ -219,19 +219,19 @@ v_cos_f32 v1, v2 // SICI: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e] // VI: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x57,0x02,0x7e] -v_not_b32 v1, v2 +v_not_b32_e32 v1, v2 // SICI: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e] // VI: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x59,0x02,0x7e] -v_bfrev_b32 v1, v2 +v_bfrev_b32_e32 v1, v2 // SICI: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e] // VI: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x5b,0x02,0x7e] -v_ffbh_u32 v1, v2 +v_ffbh_u32_e32 v1, v2 // SICI: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e] // VI: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x5d,0x02,0x7e] -v_ffbl_b32 v1, v2 +v_ffbl_b32_e32 v1, v2 // SICI: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e] // VI: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x5f,0x02,0x7e] @@ -239,19 +239,19 @@ v_ffbh_i32_e32 v1, v2 // SICI: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x79,0x02,0x7e] // VI: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x61,0x02,0x7e] -v_frexp_exp_i32_f64 v1, v[2:3] +v_frexp_exp_i32_f64_e32 v1, v[2:3] // SICI: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7b,0x02,0x7e] -// VI; v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e] -v_frexp_mant_f64 v[1:2], v[2:3] +// VI: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e] +v_frexp_mant_f64_e32 v[1:2], v[2:3] // SICI: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7d,0x02,0x7e] // VI: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x65,0x02,0x7e] -v_fract_f64 v[1:2], v[2:3] +v_fract_f64_e32 v[1:2], v[2:3] // SICI: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e] // VI: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x67,0x02,0x7e] -v_frexp_exp_i32_f32 v1, v2 +v_frexp_exp_i32_f32_e32 v1, v2 // SICI: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e] // VI: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x69,0x02,0x7e] @@ -259,19 +259,19 @@ v_frexp_mant_f32 v1, v2 // SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e] // VI: v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e] -v_clrexcp +v_clrexcp_e32 // SICI: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e] // VI: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x6d,0x02,0x7e] -v_movreld_b32 v1, v2 +v_movreld_b32_e32 v1, v2 // SICI: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e] // VI: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e] -v_movrels_b32 v1, v2 +v_movrels_b32_e32 v1, v2 // SICI: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e] // VI: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e] -v_movrelsd_b32 v1, v2 +v_movrelsd_b32_e32 v1, v2 // NOSI: error: instruction not supported on this GPU // NOSI: v_log_legacy_f32 v1, v2 @@ -286,91 +286,91 @@ v_log_legacy_f32 v1, v2 v_exp_legacy_f32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_cvt_f16_u16 v1, v2 +// NOSICI: v_cvt_f16_u16_e32 v1, v2 // VI: v_cvt_f16_u16_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e] -v_cvt_f16_u16 v1, v2 +v_cvt_f16_u16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_cvt_f16_i16 v1, v2 +// NOSICI: v_cvt_f16_i16_e32 v1, v2 // VI: v_cvt_f16_i16_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e] -v_cvt_f16_i16 v1, v2 +v_cvt_f16_i16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_cvt_u16_f16 v1, v2 +// NOSICI: v_cvt_u16_f16_e32 v1, v2 // VI: v_cvt_u16_f16_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e] -v_cvt_u16_f16 v1, v2 +v_cvt_u16_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_cvt_i16_f16 v1, v2 +// NOSICI: v_cvt_i16_f16_e32 v1, v2 // VI: v_cvt_i16_f16_e32 v1, v2 ; encoding: [0x02,0x79,0x02,0x7e] -v_cvt_i16_f16 v1, v2 +v_cvt_i16_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_rcp_f16 v1, v2 +// NOSICI: v_rcp_f16_e32 v1, v2 // VI: v_rcp_f16_e32 v1, v2 ; encoding: [0x02,0x7b,0x02,0x7e] -v_rcp_f16 v1, v2 +v_rcp_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_sqrt_f16 v1, v2 +// NOSICI: v_sqrt_f16_e32 v1, v2 // VI: v_sqrt_f16_e32 v1, v2 ; encoding: [0x02,0x7d,0x02,0x7e] -v_sqrt_f16 v1, v2 +v_sqrt_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_rsq_f16 v1, v2 +// NOSICI: v_rsq_f16_e32 v1, v2 // VI: v_rsq_f16_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e] -v_rsq_f16 v1, v2 +v_rsq_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_log_f16 v1, v2 +// NOSICI: v_log_f16_e32 v1, v2 // VI: v_log_f16_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e] -v_log_f16 v1, v2 +v_log_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_exp_f16 v1, v2 +// NOSICI: v_exp_f16_e32 v1, v2 // VI: v_exp_f16_e32 v1, v2 ; encoding: [0x02,0x83,0x02,0x7e] -v_exp_f16 v1, v2 +v_exp_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_frexp_mant_f16 v1, v2 +// NOSICI: v_frexp_mant_f16_e32 v1, v2 // VI: v_frexp_mant_f16_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e] -v_frexp_mant_f16 v1, v2 +v_frexp_mant_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_frexp_exp_i16_f16 v1, v2 +// NOSICI: v_frexp_exp_i16_f16_e32 v1, v2 // VI: v_frexp_exp_i16_f16_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e] -v_frexp_exp_i16_f16 v1, v2 +v_frexp_exp_i16_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_floor_f16 v1, v2 +// NOSICI: v_floor_f16_e32 v1, v2 // VI: v_floor_f16_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e] -v_floor_f16 v1, v2 +v_floor_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_ceil_f16 v1, v2 +// NOSICI: v_ceil_f16_e32 v1, v2 // VI: v_ceil_f16_e32 v1, v2 ; encoding: [0x02,0x8b,0x02,0x7e] -v_ceil_f16 v1, v2 +v_ceil_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_trunc_f16 v1, v2 +// NOSICI: v_trunc_f16_e32 v1, v2 // VI: v_trunc_f16_e32 v1, v2 ; encoding: [0x02,0x8d,0x02,0x7e] -v_trunc_f16 v1, v2 +v_trunc_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_rndne_f16 v1, v2 +// NOSICI: v_rndne_f16_e32 v1, v2 // VI: v_rndne_f16_e32 v1, v2 ; encoding: [0x02,0x8f,0x02,0x7e] -v_rndne_f16 v1, v2 +v_rndne_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_fract_f16 v1, v2 +// NOSICI: v_fract_f16_e32 v1, v2 // VI: v_fract_f16_e32 v1, v2 ; encoding: [0x02,0x91,0x02,0x7e] -v_fract_f16 v1, v2 +v_fract_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_sin_f16 v1, v2 +// NOSICI: v_sin_f16_e32 v1, v2 // VI: v_sin_f16_e32 v1, v2 ; encoding: [0x02,0x93,0x02,0x7e] -v_sin_f16 v1, v2 +v_sin_f16_e32 v1, v2 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_cos_f16 v1, v2 +// NOSICI: v_cos_f16_e32 v1, v2 // VI: v_cos_f16_e32 v1, v2 ; encoding: [0x02,0x95,0x02,0x7e] -v_cos_f16 v1, v2 +v_cos_f16_e32 v1, v2 diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index b4a19fee290..3fa11dd8417 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -48,27 +48,27 @@ v_mul_i32_i24_e64 v1, v2, v3 // src0 inline // SICI: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12] -v_mul_i32_i24 v1, 3, v3 +v_mul_i32_i24_e32 v1, 3, v3 // src0 negative inline // SICI: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12] -v_mul_i32_i24 v1, -3, v3 +v_mul_i32_i24_e32 v1, -3, v3 // src1 inline // SICI: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00] -v_mul_i32_i24 v1, v2, 3 +v_mul_i32_i24_e64 v1, v2, 3 // src1 negative inline // SICI: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00] -v_mul_i32_i24 v1, v2, -3 +v_mul_i32_i24_e64 v1, v2, -3 // src0 literal // SICI: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00] -v_mul_i32_i24 v1, 100, v3 +v_mul_i32_i24_e32 v1, 100, v3 // src1 negative literal // SICI: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff] -v_mul_i32_i24 v1, -100, v3 +v_mul_i32_i24_e32 v1, -100, v3 //===----------------------------------------------------------------------===// // Checks for legal operands @@ -76,31 +76,31 @@ v_mul_i32_i24 v1, -100, v3 // src0 sgpr // SICI: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12] -v_mul_i32_i24 v1, s2, v3 +v_mul_i32_i24_e32 v1, s2, v3 // src1 sgpr // SICI: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00] -v_mul_i32_i24 v1, v2, s3 +v_mul_i32_i24_e64 v1, v2, s3 // src0, src1 same sgpr // SICI: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00] -v_mul_i32_i24 v1, s2, s2 +v_mul_i32_i24_e64 v1, s2, s2 // src0 sgpr, src1 inline // SICI: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00] -v_mul_i32_i24 v1, s2, 3 +v_mul_i32_i24_e64 v1, s2, 3 // src0 inline src1 sgpr // SICI: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00] -v_mul_i32_i24 v1, 3, s3 +v_mul_i32_i24_e64 v1, 3, s3 // SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a] // VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32] -v_add_i32 v0, vcc, 0.5, v0 +v_add_i32_e32 v0, vcc, 0.5, v0 // SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40] // VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40] -v_add_i32 v0, vcc, 3.125, v0 +v_add_i32_e32 v0, vcc, 3.125, v0 //===----------------------------------------------------------------------===// // Instructions @@ -147,19 +147,19 @@ v_mul_f32 v1, v2, v3 // SICI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] // VI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] -v_mul_i32_i24 v1, v2, v3 +v_mul_i32_i24_e32 v1, v2, v3 // SICI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14] // VI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] -v_mul_hi_i32_i24 v1, v2, v3 +v_mul_hi_i32_i24_e32 v1, v2, v3 // SICI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16] // VI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] -v_mul_u32_u24 v1, v2, v3 +v_mul_u32_u24_e32 v1, v2, v3 // SICI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] // VI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] -v_mul_hi_u32_u24 v1, v2, v3 +v_mul_hi_u32_u24_e32 v1, v2, v3 // SICI: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] // NOVI: error: instruction not supported on this GPU @@ -181,37 +181,37 @@ v_max_f32 v1, v2 v3 // SICI: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] // VI: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] -v_min_i32 v1, v2, v3 +v_min_i32_e32 v1, v2, v3 // SICI: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] // VI: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] -v_max_i32 v1, v2, v3 +v_max_i32_e32 v1, v2, v3 // SICI: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] // VI: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] -v_min_u32 v1, v2, v3 +v_min_u32_e32 v1, v2, v3 // SICI: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] // VI: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] -v_max_u32 v1, v2, v3 +v_max_u32_e32 v1, v2, v3 // SICI: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] // NOVI: error: instruction not supported on this GPU -// NOVI: v_lshr_b32 v1, v2, v3 -v_lshr_b32 v1, v2, v3 +// NOVI: v_lshr_b32_e32 v1, v2, v3 +v_lshr_b32_e32 v1, v2, v3 // SICI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] // VI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20] -v_lshrrev_b32 v1, v2, v3 +v_lshrrev_b32_e32 v1, v2, v3 // SICI: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e] // NOVI: error: instruction not supported on this GPU -// NOVI: v_ashr_i32 v1, v2, v3 -v_ashr_i32 v1, v2, v3 +// NOVI: v_ashr_i32_e32 v1, v2, v3 +v_ashr_i32_e32 v1, v2, v3 // SICI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30] // VI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] -v_ashrrev_i32 v1, v2, v3 +v_ashrrev_i32_e32 v1, v2, v3 // SICI: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] // NOVI: error: instruction not supported on this GPU @@ -220,51 +220,51 @@ v_lshl_b32_e32 v1, v2, v3 // SICI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] // VI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] -v_lshlrev_b32 v1, v2, v3 +v_lshlrev_b32_e32 v1, v2, v3 // SICI: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] // VI: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] -v_and_b32 v1, v2, v3 +v_and_b32_e32 v1, v2, v3 // SICI: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] // VI: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] -v_or_b32 v1, v2, v3 +v_or_b32_e32 v1, v2, v3 // SICI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] // VI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] -v_xor_b32 v1, v2, v3 +v_xor_b32_e32 v1, v2, v3 -// SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +// SICI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00] // VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] -v_bfm_b32 v1, v2, v3 +v_bfm_b32_e64 v1, v2, v3 // SICI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] // VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] -v_mac_f32 v1, v2, v3 +v_mac_f32_e32 v1, v2, v3 // SICI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42] // VI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42] -v_madmk_f32 v1, v2, 64.0, v3 +v_madmk_f32_e32 v1, v2, 64.0, v3 // SICI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42] // VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42] -v_madak_f32 v1, v2, v3, 64.0 +v_madak_f32_e32 v1, v2, v3, 64.0 -// SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +// SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00] // VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] -v_bcnt_u32_b32 v1, v2, v3 +v_bcnt_u32_b32_e64 v1, v2, v3 -// SICI: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +// SICI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x46,0xd2,0x02,0x07,0x02,0x00] // VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] -v_mbcnt_lo_u32_b32 v1, v2, v3 +v_mbcnt_lo_u32_b32_e64 v1, v2, v3 -// SICI: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48] +// SICI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00] // VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] -v_mbcnt_hi_u32_b32 v1, v2, v3 +v_mbcnt_hi_u32_b32_e64 v1, v2, v3 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] // VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] -v_add_i32 v1, vcc, v2, v3 +v_add_i32_e32 v1, vcc, v2, v3 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] // VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] @@ -387,38 +387,38 @@ v_cvt_pknorm_u16_f32 v1, v2, v3 // VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00] v_cvt_pkrtz_f16_f32 v1, v2, v3 -// SICI: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] +// SICI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x60,0xd2,0x02,0x07,0x02,0x00] // VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] -v_cvt_pk_u16_u32 v1, v2, v3 +v_cvt_pk_u16_u32_e64 v1, v2, v3 -// SICI: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +// SICI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x62,0xd2,0x02,0x07,0x02,0x00] // VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] -v_cvt_pk_i16_i32 v1, v2, v3 +v_cvt_pk_i16_i32_e64 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_add_f16 v1, v2, v3 +// NOSICI: v_add_f16_e32 v1, v2, v3 // VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] -v_add_f16 v1, v2, v3 +v_add_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_sub_f16 v1, v2, v3 +// NOSICI: v_sub_f16_e32 v1, v2, v3 // VI: v_sub_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x40] -v_sub_f16 v1, v2, v3 +v_sub_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_subrev_f16 v1, v2, v3 +// NOSICI: v_subrev_f16_e32 v1, v2, v3 // VI: v_subrev_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x42] -v_subrev_f16 v1, v2, v3 +v_subrev_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_mul_f16 v1, v2, v3 +// NOSICI: v_mul_f16_e32 v1, v2, v3 // VI: v_mul_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] -v_mul_f16 v1, v2, v3 +v_mul_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_mac_f16 v1, v2, v3 +// NOSICI: v_mac_f16_e32 v1, v2, v3 // VI: v_mac_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] -v_mac_f16 v1, v2, v3 +v_mac_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU // NOSICI: v_madmk_f16 v1, v2, 64.0, v3 @@ -431,71 +431,71 @@ v_madmk_f16 v1, v2, 64.0, v3 v_madak_f16 v1, v2, v3, 64.0 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_add_u16 v1, v2, v3 +// NOSICI: v_add_u16_e32 v1, v2, v3 // VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -v_add_u16 v1, v2, v3 +v_add_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_sub_u16 v1, v2, v3 +// NOSICI: v_sub_u16_e32 v1, v2, v3 // VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -v_sub_u16 v1, v2, v3 +v_sub_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_subrev_u16 v1, v2, v3 +// NOSICI: v_subrev_u16_e32 v1, v2, v3 // VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] -v_subrev_u16 v1, v2, v3 +v_subrev_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_mul_lo_u16 v1, v2, v3 +// NOSICI: v_mul_lo_u16_e32 v1, v2, v3 // VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] -v_mul_lo_u16 v1, v2, v3 +v_mul_lo_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_lshlrev_b16 v1, v2, v3 +// NOSICI: v_lshlrev_b16_e32 v1, v2, v3 // VI: v_lshlrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] -v_lshlrev_b16 v1, v2, v3 +v_lshlrev_b16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_lshrrev_b16 v1, v2, v3 +// NOSICI: v_lshrrev_b16_e32 v1, v2, v3 // VI: v_lshrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] -v_lshrrev_b16 v1, v2, v3 +v_lshrrev_b16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_ashrrev_b16 v1, v2, v3 +// NOSICI: v_ashrrev_b16_e32 v1, v2, v3 // VI: v_ashrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] -v_ashrrev_b16 v1, v2, v3 +v_ashrrev_b16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_max_f16 v1, v2, v3 +// NOSICI: v_max_f16_e32 v1, v2, v3 // VI: v_max_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] -v_max_f16 v1, v2, v3 +v_max_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_min_f16 v1, v2, v3 +// NOSICI: v_min_f16_e32 v1, v2, v3 // VI: v_min_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] -v_min_f16 v1, v2, v3 +v_min_f16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_max_u16 v1, v2, v3 +// NOSICI: v_max_u16_e32 v1, v2, v3 // VI: v_max_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] -v_max_u16 v1, v2, v3 +v_max_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_max_i16 v1, v2, v3 +// NOSICI: v_max_i16_e32 v1, v2, v3 // VI: v_max_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] -v_max_i16 v1, v2, v3 +v_max_i16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_min_u16 v1, v2, v3 +// NOSICI: v_min_u16_e32 v1, v2, v3 // VI: v_min_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] -v_min_u16 v1, v2, v3 +v_min_u16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_min_i16 v1, v2, v3 +// NOSICI: v_min_i16_e32 v1, v2, v3 // VI: v_min_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x64] -v_min_i16 v1, v2, v3 +v_min_i16_e32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_ldexp_f16 v1, v2, v3 +// NOSICI: v_ldexp_f16_e32 v1, v2, v3 // VI: v_ldexp_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x66] -v_ldexp_f16 v1, v2, v3 +v_ldexp_f16_e32 v1, v2, v3 diff --git a/llvm/test/MC/AMDGPU/vop3-convert.s b/llvm/test/MC/AMDGPU/vop3-convert.s new file mode 100644 index 00000000000..08cfa7832a7 --- /dev/null +++ b/llvm/test/MC/AMDGPU/vop3-convert.s @@ -0,0 +1,411 @@ +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI + +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI + +v_mov_b32 [v1], [v2] +// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] + +v_mov_b32 v0, 0.5 +// GCN: v_mov_b32_e32 v0, 0.5 ; encoding: [0xf0,0x02,0x00,0x7e] + +v_mov_b32_e32 v0, 3.125 +// GCN: v_mov_b32_e32 v0, 0x40480000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x48,0x40] + +v_mov_b32 v1, ttmp8 +// SICI: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x78,0x02,0x02,0x7e] +// VI: v_mov_b32_e32 v1, ttmp8 ; encoding: [0x78,0x02,0x02,0x7e] + +// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] +v_mov_b32 v1, v2 + +// SICI: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e] +// VI: v_not_b32_e32 v1, v2 ; encoding: [0x02,0x57,0x02,0x7e] +v_not_b32 v1, v2 + +// SICI: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e] +// VI: v_bfrev_b32_e32 v1, v2 ; encoding: [0x02,0x59,0x02,0x7e] +v_bfrev_b32 v1, v2 + +// SICI: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e] +// VI: v_ffbh_u32_e32 v1, v2 ; encoding: [0x02,0x5b,0x02,0x7e] +v_ffbh_u32 v1, v2 + +// SICI: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e] +// VI: v_ffbl_b32_e32 v1, v2 ; encoding: [0x02,0x5d,0x02,0x7e] +v_ffbl_b32 v1, v2 + +// SICI: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e] +// VI: v_ffbh_i32_e32 v1, v2 ; encoding: [0x02,0x5f,0x02,0x7e] +v_ffbh_i32_e32 v1, v2 + +// SICI: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x79,0x02,0x7e] +// VI: v_frexp_exp_i32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x61,0x02,0x7e] +v_frexp_exp_i32_f64 v1, v[2:3] + +// SICI: v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7b,0x02,0x7e] +// VI; v_frexp_mant_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x63,0x02,0x7e] +v_frexp_mant_f64 v[1:2], v[2:3] + +// SICI: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x7d,0x02,0x7e] +// VI: v_fract_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x65,0x02,0x7e] +v_fract_f64 v[1:2], v[2:3] + +// SICI: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e] +// VI: v_frexp_exp_i32_f32_e32 v1, v2 ; encoding: [0x02,0x67,0x02,0x7e] +v_frexp_exp_i32_f32 v1, v2 + +// SICI: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e] +// VI: v_frexp_mant_f32_e32 v1, v2 ; encoding: [0x02,0x69,0x02,0x7e] +v_frexp_mant_f32 v1, v2 + +// SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e] +// VI: v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e] +v_clrexcp + +// SICI: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e] +// VI: v_movreld_b32_e32 v1, v2 ; encoding: [0x02,0x6d,0x02,0x7e] +v_movreld_b32 v1, v2 + +// SICI: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e] +// VI: v_movrels_b32_e32 v1, v2 ; encoding: [0x02,0x6f,0x02,0x7e] +v_movrels_b32 v1, v2 + +// SICI: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e] +// VI: v_movrelsd_b32_e32 v1, v2 ; encoding: [0x02,0x71,0x02,0x7e] +v_movrelsd_b32 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_cvt_f16_u16 v1, v2 +// VI: v_cvt_f16_u16_e32 v1, v2 ; encoding: [0x02,0x73,0x02,0x7e] +v_cvt_f16_u16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_cvt_f16_i16 v1, v2 +// VI: v_cvt_f16_i16_e32 v1, v2 ; encoding: [0x02,0x75,0x02,0x7e] +v_cvt_f16_i16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_cvt_u16_f16 v1, v2 +// VI: v_cvt_u16_f16_e32 v1, v2 ; encoding: [0x02,0x77,0x02,0x7e] +v_cvt_u16_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_cvt_i16_f16 v1, v2 +// VI: v_cvt_i16_f16_e32 v1, v2 ; encoding: [0x02,0x79,0x02,0x7e] +v_cvt_i16_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_rcp_f16 v1, v2 +// VI: v_rcp_f16_e32 v1, v2 ; encoding: [0x02,0x7b,0x02,0x7e] +v_rcp_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_sqrt_f16 v1, v2 +// VI: v_sqrt_f16_e32 v1, v2 ; encoding: [0x02,0x7d,0x02,0x7e] +v_sqrt_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_rsq_f16 v1, v2 +// VI: v_rsq_f16_e32 v1, v2 ; encoding: [0x02,0x7f,0x02,0x7e] +v_rsq_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_log_f16 v1, v2 +// VI: v_log_f16_e32 v1, v2 ; encoding: [0x02,0x81,0x02,0x7e] +v_log_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_exp_f16 v1, v2 +// VI: v_exp_f16_e32 v1, v2 ; encoding: [0x02,0x83,0x02,0x7e] +v_exp_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_frexp_mant_f16 v1, v2 +// VI: v_frexp_mant_f16_e32 v1, v2 ; encoding: [0x02,0x85,0x02,0x7e] +v_frexp_mant_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_frexp_exp_i16_f16 v1, v2 +// VI: v_frexp_exp_i16_f16_e32 v1, v2 ; encoding: [0x02,0x87,0x02,0x7e] +v_frexp_exp_i16_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_floor_f16 v1, v2 +// VI: v_floor_f16_e32 v1, v2 ; encoding: [0x02,0x89,0x02,0x7e] +v_floor_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_ceil_f16 v1, v2 +// VI: v_ceil_f16_e32 v1, v2 ; encoding: [0x02,0x8b,0x02,0x7e] +v_ceil_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_trunc_f16 v1, v2 +// VI: v_trunc_f16_e32 v1, v2 ; encoding: [0x02,0x8d,0x02,0x7e] +v_trunc_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_rndne_f16 v1, v2 +// VI: v_rndne_f16_e32 v1, v2 ; encoding: [0x02,0x8f,0x02,0x7e] +v_rndne_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_fract_f16 v1, v2 +// VI: v_fract_f16_e32 v1, v2 ; encoding: [0x02,0x91,0x02,0x7e] +v_fract_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_sin_f16 v1, v2 +// VI: v_sin_f16_e32 v1, v2 ; encoding: [0x02,0x93,0x02,0x7e] +v_sin_f16 v1, v2 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_cos_f16 v1, v2 +// VI: v_cos_f16_e32 v1, v2 ; encoding: [0x02,0x95,0x02,0x7e] +v_cos_f16 v1, v2 + +// src0 inline +// SICI: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12] +v_mul_i32_i24 v1, 3, v3 + +// src0 negative inline +// SICI: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12] +v_mul_i32_i24 v1, -3, v3 + +// src1 inline +// SICI: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00] +v_mul_i32_i24 v1, v2, 3 + +// src1 negative inline +// SICI: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00] +v_mul_i32_i24 v1, v2, -3 + +// GCN: v_cvt_flr_i32_f32_e32 v1, v2 ; encoding: [0x02,0x1b,0x02,0x7e] +v_cvt_flr_i32_f32 v1, v2 + +// GCN: v_cvt_off_f32_i4_e32 v1, v2 ; encoding: [0x02,0x1d,0x02,0x7e] +v_cvt_off_f32_i4_e32 v1, v2 + +// GCN: v_cvt_f32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x1f,0x02,0x7e] +v_cvt_f32_f64 v1, v[2:3] + +// GCN: v_cvt_f64_f32_e32 v[1:2], v2 ; encoding: [0x02,0x21,0x02,0x7e] +v_cvt_f64_f32 v[1:2], v2 + +// GCN: v_cvt_f32_ubyte0_e32 v1, v2 ; encoding: [0x02,0x23,0x02,0x7e] +v_cvt_f32_ubyte0 v1, v2 + +// GCN: v_cvt_f32_ubyte1_e32 v1, v2 ; encoding: [0x02,0x25,0x02,0x7e] +v_cvt_f32_ubyte1_e32 v1, v2 + +// GCN: v_cvt_f32_ubyte2_e32 v1, v2 ; encoding: [0x02,0x27,0x02,0x7e] +v_cvt_f32_ubyte2 v1, v2 + +// GCN: v_cvt_f32_ubyte3_e32 v1, v2 ; encoding: [0x02,0x29,0x02,0x7e] +v_cvt_f32_ubyte3 v1, v2 + +// GCN: v_cvt_u32_f64_e32 v1, v[2:3] ; encoding: [0x02,0x2b,0x02,0x7e] +v_cvt_u32_f64 v1, v[2:3] + +// GCN: v_cvt_f64_u32_e32 v[1:2], v2 ; encoding: [0x02,0x2d,0x02,0x7e] +v_cvt_f64_u32 v[1:2], v2 + +// SICI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +// VI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] +v_mul_i32_i24 v1, v2, v3 + +// SICI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14] +// VI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] +v_mul_hi_i32_i24 v1, v2, v3 + +// SICI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16] +// VI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] +v_mul_u32_u24 v1, v2, v3 + +// SICI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] +// VI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +v_mul_hi_u32_u24 v1, v2, v3 + +// SICI: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] +// VI: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] +v_min_i32 v1, v2, v3 + +// SICI: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] +// VI: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] +v_max_i32 v1, v2, v3 + +// SICI: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] +// VI: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] +v_min_u32 v1, v2, v3 + +// SICI: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] +// VI: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] +v_max_u32 v1, v2, v3 + +// SICI: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_lshr_b32 v1, v2, v3 +v_lshr_b32 v1, v2, v3 + +// SICI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] +// VI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20] +v_lshrrev_b32 v1, v2, v3 + +// SICI: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_ashr_i32 v1, v2, v3 +v_ashr_i32 v1, v2, v3 + +// SICI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30] +// VI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] +v_ashrrev_i32 v1, v2, v3 + +// SICI: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_lshl_b32_e32 v1, v2, v3 +v_lshl_b32_e32 v1, v2, v3 + +// SICI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +// VI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] +v_lshlrev_b32 v1, v2, v3 + +// SICI: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +// VI: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] +v_and_b32 v1, v2, v3 + +// SICI: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] +// VI: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] +v_or_b32 v1, v2, v3 + +// SICI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] +// VI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] +v_xor_b32 v1, v2, v3 + +// SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] +v_bfm_b32 v1, v2, v3 + +// SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] +v_bcnt_u32_b32 v1, v2, v3 + +// SICI: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +// VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] +v_mbcnt_lo_u32_b32 v1, v2, v3 + +// SICI: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48] +// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] +v_mbcnt_hi_u32_b32 v1, v2, v3 + +// SICI: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] +// VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_u16_u32 v1, v2, v3 + +// SICI: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +// VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_i16_i32 v1, v2, v3 + +// SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] +v_bfm_b32 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_add_f16 v1, v2, v3 +// VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] +v_add_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_sub_f16 v1, v2, v3 +// VI: v_sub_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x40] +v_sub_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_subrev_f16 v1, v2, v3 +// VI: v_subrev_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x42] +v_subrev_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_mul_f16 v1, v2, v3 +// VI: v_mul_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +v_mul_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_mac_f16 v1, v2, v3 +// VI: v_mac_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +v_mac_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_add_u16 v1, v2, v3 +// VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +v_add_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_sub_u16 v1, v2, v3 +// VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +v_sub_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_subrev_u16 v1, v2, v3 +// VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +v_subrev_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_mul_lo_u16 v1, v2, v3 +// VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +v_mul_lo_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_lshlrev_b16 v1, v2, v3 +// VI: v_lshlrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +v_lshlrev_b16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_lshrrev_b16 v1, v2, v3 +// VI: v_lshrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] +v_lshrrev_b16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_ashrrev_b16 v1, v2, v3 +// VI: v_ashrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] +v_ashrrev_b16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_max_f16 v1, v2, v3 +// VI: v_max_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] +v_max_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_min_f16 v1, v2, v3 +// VI: v_min_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] +v_min_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_max_u16 v1, v2, v3 +// VI: v_max_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] +v_max_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_max_i16 v1, v2, v3 +// VI: v_max_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] +v_max_i16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_min_u16 v1, v2, v3 +// VI: v_min_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +v_min_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_min_i16 v1, v2, v3 +// VI: v_min_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x64] +v_min_i16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_ldexp_f16 v1, v2, v3 +// VI: v_ldexp_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x66] +v_ldexp_f16 v1, v2, v3 |