summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp98
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h10
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h12
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp71
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td148
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td16
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td33
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td28
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td80
15 files changed, 323 insertions, 281 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index a01f5d37c7c..69dc5298617 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -66,7 +66,9 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
- VI = 1
+ VI = 1,
+ SDWA = 2,
+ SDWA9 = 3
};
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
@@ -101,7 +103,12 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
+ SIEncodingFamily Gen = subtargetEncodingFamily(ST);
+ if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
+ Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
+ : SIEncodingFamily::SDWA;
+
+ int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 54df9728db9..1e4fa521f01 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -260,6 +260,8 @@ public:
return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
}
+ bool isSDWARegKind() const;
+
bool isImmTy(ImmTy ImmT) const {
return isImm() && Imm.Type == ImmT;
}
@@ -1244,6 +1246,15 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
}
+bool AMDGPUOperand::isSDWARegKind() const {
+ if (AsmParser->isVI())
+ return isVReg();
+ else if (AsmParser->isGFX9())
+ return isRegKind();
+ else
+ return false;
+}
+
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
{
assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
@@ -4490,12 +4501,11 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
- // V_NOP_sdwa_vi has no optional sdwa arguments
+ // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- if (isGFX9() &&
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
@@ -4505,8 +4515,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
case SIInstrFlags::VOP2:
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- if (isGFX9() &&
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
@@ -4516,9 +4525,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
break;
case SIInstrFlags::VOPC:
- if (isVI()) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- }
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 88c92b9582f..20db4e51397 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -49,6 +49,17 @@ addOperand(MCInst &Inst, const MCOperand& Opnd) {
MCDisassembler::SoftFail;
}
+static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
+ uint16_t NameIdx) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
+ if (OpIdx != -1) {
+ auto I = MI.begin();
+ std::advance(I, OpIdx);
+ MI.insert(I, Op);
+ }
+ return OpIdx;
+}
+
static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
@@ -106,12 +117,12 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
-#define DECODE_SDWA9(DecName) \
-DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName)
+#define DECODE_SDWA(DecName) \
+DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
-DECODE_SDWA9(Src32)
-DECODE_SDWA9(Src16)
-DECODE_SDWA9(VopcDst)
+DECODE_SDWA(Src32)
+DECODE_SDWA(Src16)
+DECODE_SDWA(VopcDst)
#include "AMDGPUGenDisassemblerTables.inc"
@@ -149,6 +160,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
raw_ostream &WS,
raw_ostream &CS) const {
CommentStream = &CS;
+ bool IsSDWA = false;
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
@@ -170,10 +182,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res) break;
Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
- if (Res) break;
+ if (Res) { IsSDWA = true; break; }
Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
- if (Res) break;
+ if (Res) { IsSDWA = true; break; }
}
// Reinitialize Bytes as DPP64 could have eaten too much
@@ -200,17 +212,36 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) {
// Insert dummy unused src2_modifiers.
- int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::src2_modifiers);
- auto I = MI.begin();
- std::advance(I, Src2ModIdx);
- MI.insert(I, MCOperand::createImm(0));
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src2_modifiers);
}
+ if (Res && IsSDWA)
+ Res = convertSDWAInst(MI);
+
Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0;
return Res;
}
+DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
+ if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
+ // VOPC - insert clamp
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
+ } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
+ if (SDst != -1) {
+ // VOPC - insert VCC register as sdst
+ insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC),
+ AMDGPU::OpName::sdst);
+ } else {
+ // VOP1/2 - insert omod if present in instruction
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
+ }
+ }
+ return MCDisassembler::Success;
+}
+
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
return getContext().getRegisterInfo()->
getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -592,36 +623,43 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
-MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width,
- unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
+ unsigned Val) const {
using namespace AMDGPU::SDWA;
- if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
- Val <= SDWA9EncValues::SRC_VGPR_MAX) {
- return createRegOperand(getVgprClassId(Width),
- Val - SDWA9EncValues::SRC_VGPR_MIN);
- }
- if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
- Val <= SDWA9EncValues::SRC_SGPR_MAX) {
- return createSRegOperand(getSgprClassId(Width),
- Val - SDWA9EncValues::SRC_SGPR_MIN);
- }
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
+ if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_VGPR_MAX) {
+ return createRegOperand(getVgprClassId(Width),
+ Val - SDWA9EncValues::SRC_VGPR_MIN);
+ }
+ if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
+ Val <= SDWA9EncValues::SRC_SGPR_MAX) {
+ return createSRegOperand(getSgprClassId(Width),
+ Val - SDWA9EncValues::SRC_SGPR_MIN);
+ }
- return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN);
+ return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN);
+ } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ return createRegOperand(getVgprClassId(Width), Val);
+ }
+ llvm_unreachable("unsupported target");
}
-MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const {
- return decodeSDWA9Src(OPW16, Val);
+MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
+ return decodeSDWASrc(OPW16, Val);
}
-MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const {
- return decodeSDWA9Src(OPW32, Val);
+MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
+ return decodeSDWASrc(OPW32, Val);
}
-MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
using namespace AMDGPU::SDWA;
+ assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] &&
+ "SDWAVopcDst should be present only on GFX9");
if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
if (Val > AMDGPU::EncValues::SGPR_MAX) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 5fa3cf1a223..3d71db909e2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -65,6 +65,8 @@ public:
uint64_t Inst,
uint64_t Address) const;
+ DecodeStatus convertSDWAInst(MCInst &MI) const;
+
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
@@ -105,10 +107,10 @@ public:
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
- MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const;
- MCOperand decodeSDWA9Src16(unsigned Val) const;
- MCOperand decodeSDWA9Src32(unsigned Val) const;
- MCOperand decodeSDWA9VopcDst(unsigned Val) const;
+ MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeSDWASrc16(unsigned Val) const;
+ MCOperand decodeSDWASrc32(unsigned Val) const;
+ MCOperand decodeSDWAVopcDst(unsigned Val) const;
};
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index a856b17a228..1b062064ace 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -52,15 +52,15 @@ public:
return 0;
}
- virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
return 0;
}
- virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
return 0;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index e02acf516c0..ff0801e5d63 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -69,14 +69,14 @@ public:
unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
-
- unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+
+ unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
};
} // end anonymous namespace
@@ -328,11 +328,11 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned
-SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
using namespace AMDGPU::SDWA;
-
+
uint64_t RegEnc = 0;
const MCOperand &MO = MI.getOperand(OpNo);
@@ -347,9 +347,9 @@ SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned
-SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
using namespace AMDGPU::SDWA;
uint64_t RegEnc = 0;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 5cd90323ff6..3915c0e5bdb 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -118,9 +118,9 @@ namespace AMDGPU {
// Operand for source modifiers for VOP instructions
OPERAND_INPUT_MODS,
- // Operand for GFX9 SDWA instructions
- OPERAND_SDWA9_SRC,
- OPERAND_SDWA9_VOPC_DST,
+ // Operand for SDWA instructions
+ OPERAND_SDWA_SRC,
+ OPERAND_SDWA_VOPC_DST,
/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1097814e99c..35c1c3ed3f5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2108,7 +2108,9 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
- if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET)
+ if (!MO.isImm() ||
+ OperandType < AMDGPU::OPERAND_SRC_FIRST ||
+ OperandType > AMDGPU::OPERAND_SRC_LAST)
return false;
// MachineOperand provides no way to tell the true operand size, since it only
@@ -2433,8 +2435,73 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // Verify SDWA
+ if (isSDWA(MI)) {
+
+ if (!ST.hasSDWA()) {
+ ErrInfo = "SDWA is not supported on this target";
+ return false;
+ }
+
+ int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
+ if ( DstIdx == -1)
+ DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);
+
+ const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
+
+ for (int OpIdx: OpIndicies) {
+ if (OpIdx == -1)
+ continue;
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+
+ if (AMDGPU::isVI(ST)) {
+ // Only VGPRS on VI
+ if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
+ ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
+ return false;
+ }
+ } else {
+ // No immediates on GFX9
+ if (!MO.isReg()) {
+ ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
+ return false;
+ }
+ }
+ }
+
+ if (AMDGPU::isVI(ST)) {
+ // No omod allowed on VI
+ const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod != nullptr &&
+ (!OMod->isImm() || OMod->getImm() != 0)) {
+ ErrInfo = "OMod not allowed in SDWA instructions on VI";
+ return false;
+ }
+ }
+
+ uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
+ if (isVOPC(BasicOpcode)) {
+ if (AMDGPU::isVI(ST) && DstIdx != -1) {
+ // Only vcc allowed as dst on VI for VOPC
+ const MachineOperand &Dst = MI.getOperand(DstIdx);
+ if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
+ ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
+ return false;
+ }
+ } else if (AMDGPU::isGFX9(ST)) {
+ // No clamp allowed on GFX9 for VOPC
+ const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+ if (Clamp != nullptr &&
+ (!Clamp->isImm() || Clamp->getImm() != 0)) {
+ ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
+ return false;
+ }
+ }
+ }
+ }
+
// Verify VOP*
- if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) {
+ if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI)) {
// Only look at the true operands. Only a real operand can use the constant
// bus, and we don't want to check pseudo-operands like the source modifier
// flags.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f6e5e8883f6..74b48c76180 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -814,6 +814,9 @@ namespace AMDGPU {
int getSDWAOp(uint16_t Opcode);
LLVM_READONLY
+ int getBasicFromSDWAOp(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteRev(uint16_t Opcode);
LLVM_READONLY
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0b8523a3f1e..a8686ec2999 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -20,6 +20,8 @@ def SIEncodingFamily {
int NONE = -1;
int SI = 0;
int VI = 1;
+ int SDWA = 2;
+ int SDWA9 = 3;
}
//===----------------------------------------------------------------------===//
@@ -452,25 +454,25 @@ def ExpSrc3 : RegisterOperand<VGPR_32> {
let ParserMatchClass = VReg32OrOffClass;
}
-class SDWA9Src : RegisterOperand<VS_32> {
+class SDWASrc : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_SDWA9_SRC";
- let EncoderMethod = "getSDWA9SrcEncoding";
+ let OperandType = "OPERAND_SDWA_SRC";
+ let EncoderMethod = "getSDWASrcEncoding";
}
-def SDWA9Src32 : SDWA9Src {
- let DecoderMethod = "decodeSDWA9Src32";
+def SDWASrc32 : SDWASrc {
+ let DecoderMethod = "decodeSDWASrc32";
}
-def SDWA9Src16 : SDWA9Src {
- let DecoderMethod = "decodeSDWA9Src16";
+def SDWASrc16 : SDWASrc {
+ let DecoderMethod = "decodeSDWASrc16";
}
-def SDWA9VopcDst : VOPDstOperand<SReg_64> {
+def SDWAVopcDst : VOPDstOperand<SReg_64> {
let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_SDWA9_VOPC_DST";
- let EncoderMethod = "getSDWA9VopcDstEncoding";
- let DecoderMethod = "decodeSDWA9VopcDst";
+ let OperandType = "OPERAND_SDWA_VOPC_DST";
+ let EncoderMethod = "getSDWAVopcDstEncoding";
+ let DecoderMethod = "decodeSDWAVopcDst";
}
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
@@ -634,13 +636,13 @@ class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass>
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
-def FPRegInputModsMatchClass : AsmOperandClass {
- let Name = "RegWithFPInputMods";
+def FPRegSDWAInputModsMatchClass : AsmOperandClass {
+ let Name = "SDWARegWithFPInputMods";
let ParserMethod = "parseRegWithFPInputMods";
- let PredicateMethod = "isRegKind";
+ let PredicateMethod = "isSDWARegKind";
}
-def FPRegInputMods : InputMods <FPRegInputModsMatchClass> {
+def FPRegSDWAInputMods : InputMods <FPRegSDWAInputModsMatchClass> {
let PrintMethod = "printOperandAndFPInputMods";
}
@@ -655,13 +657,13 @@ def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
}
-def IntRegInputModsMatchClass : AsmOperandClass {
- let Name = "RegWithIntInputMods";
+def IntRegSDWAInputModsMatchClass : AsmOperandClass {
+ let Name = "SDWARegWithIntInputMods";
let ParserMethod = "parseRegWithIntInputMods";
- let PredicateMethod = "isRegKind";
+ let PredicateMethod = "isSDWARegKind";
}
-def IntRegInputMods : InputMods <IntRegInputModsMatchClass> {
+def IntRegSDWAInputMods : InputMods <IntRegSDWAInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
@@ -851,10 +853,10 @@ class getVALUDstForVT<ValueType VT> {
}
// Returns the register class to use for the destination of VOP[12C]
-// instructions with GFX9 SDWA extension
-class getSDWA9DstForVT<ValueType VT> {
+// instructions with SDWA extension
+class getSDWADstForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 1),
- SDWA9VopcDst, // VOPC
+ SDWAVopcDst, // VOPC
VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
}
@@ -898,8 +900,8 @@ class getVregSrcForVT<ValueType VT> {
!if(!eq(VT.Size, 64), VReg_64, VGPR_32));
}
-class getSDWA9SrcForVT <ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32);
+class getSDWASrcForVT <ValueType VT> {
+ RegisterOperand ret = !if(!eq(VT.Size, 16), SDWASrc16, SDWASrc32);
}
// Returns the register class to use for sources of VOP3 instructions for the
@@ -995,7 +997,7 @@ class getSrcMod <ValueType VT> {
);
}
-// Return type of input modifiers operand specified input operand for SDWA/DPP
+// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
@@ -1004,13 +1006,13 @@ class getSrcModExt <ValueType VT> {
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
-// Return type of input modifiers operand specified input operand for SDWA 9
-class getSrcModSDWA9 <ValueType VT> {
+// Return type of input modifiers operand specified input operand for SDWA
+class getSrcModSDWA <ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
- Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods);
+ Operand ret = !if(isFP, FPRegSDWAInputMods, IntRegSDWAInputMods);
}
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
@@ -1141,36 +1143,12 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
/* endif */)));
}
-class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
- bit HasFloatModifiers, Operand Src0Mod, Operand Src1Mod,
- ValueType DstVT> {
- dag ret = !if(!eq(NumSrcArgs, 0),
- // VOP1 without input operands (V_NOP)
- (ins),
- !if(!eq(NumSrcArgs, 1),
- // VOP1_SDWA
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel),
- !if(!eq(NumSrcArgs, 2),
- !if(!eq(DstVT.Size, 1),
- // VOPC_SDWA with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel)),
- (ins)/* endif */)));
-}
-// Ins for GFX9 SDWA
-class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
- bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
- ValueType DstVT> {
+// Ins for SDWA
+class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
+ bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
+ ValueType DstVT> {
dag ret = !if(!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
@@ -1178,31 +1156,31 @@ class getInsSDWA9 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArg
!if(!eq(NumSrcArgs, 1),
// VOP1
!if(!eq(HasSDWAOMod, 0),
- // VOP1_SDWA9 without omod
+ // VOP1_SDWA without omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod:$clamp,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel),
- // VOP1_SDWA9 with omod
+ // VOP1_SDWA with omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
clampmod:$clamp, omod:$omod,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel)),
!if(!eq(NumSrcArgs, 2),
!if(!eq(DstVT.Size, 1),
- // VOPC_SDWA9
+ // VOPC_SDWA
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA9
+ clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
+ // VOP2_SDWA
!if(!eq(HasSDWAOMod, 0),
- // VOP2_SDWA9 without omod
+ // VOP2_SDWA without omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP1_SDWA9 with omod
+ // VOP2_SDWA with omod
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
clampmod:$clamp, omod:$omod,
@@ -1220,12 +1198,12 @@ class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
(outs)); // V_NOP
}
-// Outs for GFX9 SDWA
-class getOutsSDWA9 <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA9> {
+// Outs for SDWA
+class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
dag ret = !if(HasDst,
!if(!eq(DstVT.Size, 1),
- (outs DstRCSDWA9:$sdst),
- (outs DstRCSDWA9:$vdst)),
+ (outs DstRCSDWA:$sdst),
+ (outs DstRCSDWA:$vdst)),
(outs)); // V_NOP
}
@@ -1387,8 +1365,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src2VT = ArgVT[3];
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
- field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
- field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT<DstVT>.ret;
+ field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -1396,19 +1373,15 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
- field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
- field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
- field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT<Src0VT>.ret;
- field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT<Src0VT>.ret;
+ field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
+ field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
field Operand Src0Mod = getSrcMod<Src0VT>.ret;
field Operand Src1Mod = getSrcMod<Src1VT>.ret;
field Operand Src2Mod = getSrcMod<Src2VT>.ret;
field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
- field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret;
- field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret;
- field Operand Src0ModSDWA9 = getSrcModSDWA9<Src0VT>.ret;
- field Operand Src1ModSDWA9 = getSrcModSDWA9<Src1VT>.ret;
+ field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
+ field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
@@ -1457,8 +1430,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Outs32 = Outs;
field dag Outs64 = Outs;
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
- field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCSDWA>.ret;
- field dag OutsSDWA9 = getOutsSDWA9<HasDst, DstVT, DstRCSDWA9>.ret;
+ field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
@@ -1471,11 +1443,9 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
- HasModifiers, Src0ModSDWA, Src1ModSDWA,
+ HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
- field dag InsSDWA9 = getInsSDWA9<Src0SDWA9, Src1SDWA9, NumSrcArgs,
- HasSDWAOMod, Src0ModSDWA9, Src1ModSDWA9,
- DstVT>.ret;
+
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
@@ -1628,13 +1598,13 @@ def getSDWAOp : InstrMapping {
let ValueCols = [["SDWA"]];
}
-// Maps ordinary instructions to their SDWA GFX9 counterparts
-def getSDWA9Op : InstrMapping {
+// Maps SDWA instructions to their ordinary counterparts
+def getBasicFromSDWAOp : InstrMapping {
let FilterClass = "VOP";
let RowFields = ["OpName"];
let ColFields = ["AsmVariantName"];
- let KeyCol = ["Default"];
- let ValueCols = [["SDWA9"]];
+ let KeyCol = ["SDWA"];
+ let ValueCols = [["Default"]];
}
def getMaskedMIMGOp : InstrMapping {
@@ -1669,7 +1639,9 @@ def getMCOpcodeGen : InstrMapping {
let ColFields = ["Subtarget"];
let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
- [!cast<string>(SIEncodingFamily.VI)]];
+ [!cast<string>(SIEncodingFamily.VI)],
+ [!cast<string>(SIEncodingFamily.SDWA)],
+ [!cast<string>(SIEncodingFamily.SDWA9)]];
}
// Get equivalent SOPK instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index f4ddf189168..e756c86e35d 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -224,7 +224,7 @@ static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
static bool isSubregOf(const MachineOperand &SubReg,
const MachineOperand &SuperReg,
const TargetRegisterInfo *TRI) {
-
+
if (!SuperReg.isReg() || !SubReg.isReg())
return false;
@@ -557,7 +557,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
+
if (TRI->isPhysicalRegister(Src0->getReg()) ||
TRI->isPhysicalRegister(Dst->getReg()))
break;
@@ -590,7 +590,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
break;
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
+
if (TRI->isPhysicalRegister(Src1->getReg()) ||
TRI->isPhysicalRegister(Dst->getReg()))
break;
@@ -613,9 +613,17 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
if (AMDGPU::getSDWAOp(Opc) != -1)
return true;
int Opc32 = AMDGPU::getVOPe32(Opc);
- if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
- return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
- !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) {
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return false;
+
+ if (TII->isVOPC(Opc)) {
+ const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ return SDst && SDst->getReg() == AMDGPU::VCC;
+ } else {
+ return !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ }
+ }
return false;
}
@@ -641,6 +649,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
+ } else {
+ Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ assert(Dst &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+ SDWAInst.add(*Dst);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -678,8 +691,12 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
}
// Initialize clamp.
- assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
- SDWAInst.addImm(0);
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1)
+ SDWAInst.addImm(0);
+
+ // Initialize omod.
+ if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1)
+ SDWAInst.addImm(0);
// Initialize dst_sel and dst_unused if present
if (Dst) {
@@ -766,7 +783,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
-
+
// Find all SDWA operands in MF.
matchSDWAOperands(MF);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 95b5ef0a49d..96b33c373f0 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -93,11 +93,6 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP1";
}
-class VOP1_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- VOP_SDWA9_Pseudo <OpName, P, pattern> {
- let AsmMatchConverter = "cvtSdwaVOP1";
-}
-
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
@@ -117,7 +112,6 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
- def _sdwa9 : VOP1_SDWA9_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
@@ -274,12 +268,10 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+
let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel);
- let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0, 1>.ret;
@@ -545,8 +537,8 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9")>,
- VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+ VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
// For now left dpp only for asm/dasm
// TODO: add corresponding pseudo
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 657cacaa792..94f4274c31d 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -114,11 +114,6 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOP2";
}
-class VOP2_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- VOP_SDWA9_Pseudo <OpName, P, pattern> {
- let AsmMatchConverter = "cvtSdwaVOP2";
-}
-
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
@@ -139,7 +134,6 @@ multiclass VOP2Inst <string opName,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
- def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P>;
}
multiclass VOP2bInst <string opName,
@@ -156,10 +150,6 @@ multiclass VOP2bInst <string opName,
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
-
- def _sdwa9 : VOP2_SDWA9_Pseudo <opName, P> {
- let AsmMatchConverter = "cvtSdwaVOP2b";
- }
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -221,17 +211,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
VGPR_32:$src2, // stub argument
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
- VGPR_32:$src2, // stub argument
- clampmod:$clamp, omod:$omod,
- dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
@@ -289,15 +275,10 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ clampmod:$clamp, omod:$omod,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
- clampmod:$clamp, omod:$omod,
- dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
-
let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
Src1Mod:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
@@ -728,8 +709,8 @@ multiclass VOP2_SDWA_Real <bits<6> op> {
multiclass VOP2_SDWA9_Real <bits<6> op> {
def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9")>,
- VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+ VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP2be_Real_e32e64_vi <bits<6> op> :
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index cd347b86d30..f3482a22d5d 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -113,11 +113,6 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = "cvtSdwaVOPC";
}
-class VOPC_SDWA9_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- VOP_SDWA9_Pseudo <OpName, P, pattern> {
- let AsmMatchConverter = "cvtSdwaVOPC";
-}
-
// This class is used only with VOPC instructions. Use $sdst for out operand
class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
@@ -189,13 +184,6 @@ multiclass VOPC_Pseudos <string opName,
let isConvergent = DefExec;
let isCompare = 1;
}
-
- def _sdwa9 : VOPC_SDWA9_Pseudo <opName, P> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
- let SchedRW = P.Schedule;
- let isConvergent = DefExec;
- let isCompare = 1;
- }
}
def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>;
@@ -540,14 +528,12 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
VOPC_Profile<sched, vt, i32> {
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
let Asm64 = "$sdst, $src0_modifiers, $src1";
+
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
- let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0,
- Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
+
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
- //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
@@ -580,12 +566,6 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
-
- def _sdwa9 : VOPC_SDWA9_Pseudo <opName, p> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
- let SchedRW = p.Schedule;
- let isConvergent = DefExec;
- }
}
def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>;
@@ -954,8 +934,8 @@ multiclass VOPC_Real_vi <bits<10> op> {
VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9")>,
- VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA9_Pseudo>(NAME#"_sdwa9").Pfl>;
+ VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
!cast<Instruction>(NAME#"_e32_vi")> {
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 4da654f84f9..f5df1a82e32 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -300,6 +300,19 @@ class VOP_SDWAe<VOPProfile P> : Enc64 {
let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
}
+// GFX9 adds two features to SDWA:
+// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD.
+// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather
+// than VGPRs (at most 1 can be an SGPR);
+// b. OMOD is the standard output modifier (result *2, *4, /2)
+// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This
+// replaces OMOD and the dest fields with SD and SDST (SGPR destination)
+// field.
+// a. When SD=1, the SDST is used as the destination for the compare result;
+// b. When SD=0, VCC is used.
+//
+// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA
+
// gfx9 SDWA basic encoding
class VOP_SDWA9e<VOPProfile P> : Enc64 {
bits<9> src0; // {src0_sgpr{0}, src0{7-0}}
@@ -353,6 +366,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
string Mnemonic = opName;
string AsmOperands = P.AsmSDWA;
+ string AsmOperands9 = P.AsmSDWA9;
let Size = 8;
let mayLoad = 0;
@@ -372,53 +386,9 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
VOPProfile Pfl = P;
}
-// GFX9 adds two features to SDWA:
-// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD.
-// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather
-// than VGPRs (at most 1 can be an SGPR);
-// b. OMOD is the standard output modifier (result *2, *4, /2)
-// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This
-// replaces OMOD and the dest fields with SD and SDST (SGPR destination)
-// field.
-// a. When SD=1, the SDST is used as the destination for the compare result;
-// b.when SD=0, VCC is used.
-//
-// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA
-
-class VOP_SDWA9_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
- InstSI <P.OutsSDWA9, P.InsSDWA9, "", pattern>,
- VOP <opName>,
- SIMCInstr <opName#"_sdwa9", SIEncodingFamily.NONE>,
- MnemonicAlias <opName#"_sdwa9", opName> {
-
- let isPseudo = 1;
- let isCodeGenOnly = 1;
- let UseNamedOperandTable = 1;
-
- string Mnemonic = opName;
- string AsmOperands = P.AsmSDWA9;
-
- let Size = 8;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-
- let VALU = 1;
- let SDWA = 1;
- let Uses = [EXEC];
-
- let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst);
- let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst);
- let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA9";
-
- VOPProfile Pfl = P;
-}
-
class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -431,6 +401,10 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
+ // string Mnemonic = ps.Mnemonic;
+ // string AsmOperands = ps.AsmOperands;
+ // string AsmOperands9 = ps.AsmOperands9;
+
// Copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let AssemblerPredicate = ps.AssemblerPredicate;
@@ -443,9 +417,9 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
let TSFlags = ps.TSFlags;
}
-class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -458,13 +432,15 @@ class VOP_SDWA9_Real <VOP_SDWA9_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
+ let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
+ let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
+ let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9,
+ AMDGPUAsmVariants.Disable);
+ let DecoderNamespace = "SDWA9";
+
// Copy relevant pseudo op flags
- let SubtargetPredicate = ps.SubtargetPredicate;
- let AssemblerPredicate = ps.AssemblerPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
- let AsmVariantName = ps.AsmVariantName;
let UseNamedOperandTable = ps.UseNamedOperandTable;
- let DecoderNamespace = ps.DecoderNamespace;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
OpenPOWER on IntegriCloud