summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp112
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp47
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h23
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td24
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td198
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/VIInstrFormats.td55
8 files changed, 414 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c099c9b3a36..500a3aa40e8 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -74,6 +74,8 @@ public:
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
+ ImmTySdwaSel,
+ ImmTySdwaDstUnused,
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
@@ -253,6 +255,14 @@ public:
return isImmTy(ImmTyDppBoundCtrl);
}
+ bool isSDWASel() const {
+ return isImmTy(ImmTySdwaSel);
+ }
+
+ bool isSDWADstUnused() const {
+ return isImmTy(ImmTySdwaDstUnused);
+ }
+
void setModifiers(unsigned Mods) {
assert(isReg() || (isImm() && Imm.Modifiers == 0));
if (isReg())
@@ -522,6 +532,7 @@ public:
OperandMatchResultTy parseOptionalOps(
const ArrayRef<OptionalOperand> &OptionalOps,
OperandVector &Operands);
+ OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
@@ -569,6 +580,9 @@ public:
void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands);
void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands);
void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods);
+
+ OperandMatchResultTy parseSDWASel(OperandVector &Operands);
+ OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
};
struct OptionalOperand {
@@ -1396,6 +1410,30 @@ AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
return MatchOperand_NoMatch;
}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseStringWithPrefix(const char *Prefix, StringRef &Value) {
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ return MatchOperand_NoMatch;
+ }
+ StringRef Tok = Parser.getTok().getString();
+ if (Tok != Prefix) {
+ return MatchOperand_NoMatch;
+ }
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon)) {
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ return MatchOperand_ParseFail;
+ }
+
+ Value = Parser.getTok().getString();
+ return MatchOperand_Success;
+}
+
//===----------------------------------------------------------------------===//
// ds
//===----------------------------------------------------------------------===//
@@ -2296,6 +2334,80 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
}
+//===----------------------------------------------------------------------===//
+// sdwa
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSDWASel(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ StringRef Value;
+ AMDGPUAsmParser::OperandMatchResultTy res;
+
+ res = parseStringWithPrefix("dst_sel", Value);
+ if (res == MatchOperand_ParseFail) {
+ return MatchOperand_ParseFail;
+ } else if (res == MatchOperand_NoMatch) {
+ res = parseStringWithPrefix("src0_sel", Value);
+ if (res == MatchOperand_ParseFail) {
+ return MatchOperand_ParseFail;
+ } else if (res == MatchOperand_NoMatch) {
+ res = parseStringWithPrefix("src1_sel", Value);
+ if (res != MatchOperand_Success) {
+ return res;
+ }
+ }
+ }
+
+ int64_t Int;
+ Int = StringSwitch<int64_t>(Value)
+ .Case("BYTE_0", 0)
+ .Case("BYTE_1", 1)
+ .Case("BYTE_2", 2)
+ .Case("BYTE_3", 3)
+ .Case("WORD_0", 4)
+ .Case("WORD_1", 5)
+ .Case("DWORD", 6)
+ .Default(0xffffffff);
+ Parser.Lex(); // eat last token
+
+ if (Int == 0xffffffff) {
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
+ AMDGPUOperand::ImmTySdwaSel));
+ return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ StringRef Value;
+ AMDGPUAsmParser::OperandMatchResultTy res;
+
+ res = parseStringWithPrefix("dst_unused", Value);
+ if (res != MatchOperand_Success) {
+ return res;
+ }
+
+ int64_t Int;
+ Int = StringSwitch<int64_t>(Value)
+ .Case("UNUSED_PAD", 0)
+ .Case("UNUSED_SEXT", 1)
+ .Case("UNUSED_PRESERVE", 2)
+ .Default(0xffffffff);
+ Parser.Lex(); // eat last token
+
+ if (Int == 0xffffffff) {
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
+ AMDGPUOperand::ImmTySdwaDstUnused));
+ return MatchOperand_Success;
+}
+
/// Force static initialization.
extern "C" void LLVMInitializeAMDGPUAsmParser() {
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index e02bc90e89c..8ac3caf0081 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -282,6 +282,8 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
O << "_e64 ";
else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
O << "_dpp ";
+ else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
+ O << "_sdwa ";
else
O << "_e32 ";
@@ -479,6 +481,51 @@ void AMDGPUInstPrinter::printBoundCtrlOperand(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ switch (Imm) {
+ case 0: O << "BYTE_0"; break;
+ case 1: O << "BYTE_1"; break;
+ case 2: O << "BYTE_2"; break;
+ case 3: O << "BYTE_3"; break;
+ case 4: O << "WORD_0"; break;
+ case 5: O << "WORD_1"; break;
+ case 6: O << "DWORD"; break;
+ default: llvm_unreachable("Invalid SDWA data select operand");
+ }
+}
+
+void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << "dst_sel:";
+ printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << "src0_sel:";
+ printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << "src1_sel:";
+ printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << "dst_unused:";
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ switch (Imm) {
+ case 0: O << "UNUSED_PAD"; break;
+ case 1: O << "UNUSED_SEXT"; break;
+ case 2: O << "UNUSED_PRESERVE"; break;
+ default: llvm_unreachable("Invalid SDWA dest_unused operand");
+ }
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 64618c737e7..97c9c7614a7 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -67,6 +67,11 @@ private:
void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 334183d8a42..50f3a50ff6c 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -29,17 +29,18 @@ enum {
VOP2 = 1 << 11,
VOP3 = 1 << 12,
VOPC = 1 << 13,
- DPP = 1 << 14,
-
- MUBUF = 1 << 15,
- MTBUF = 1 << 16,
- SMRD = 1 << 17,
- DS = 1 << 18,
- MIMG = 1 << 19,
- FLAT = 1 << 20,
- WQM = 1 << 21,
- VGPRSpill = 1 << 22,
- VOPAsmPrefer32Bit = 1 << 23
+ SDWA = 1 << 14,
+ DPP = 1 << 15,
+
+ MUBUF = 1 << 16,
+ MTBUF = 1 << 17,
+ SMRD = 1 << 18,
+ DS = 1 << 19,
+ MIMG = 1 << 20,
+ FLAT = 1 << 21,
+ WQM = 1 << 22,
+ VGPRSpill = 1 << 23,
+ VOPAsmPrefer32Bit = 1 << 24
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 1f0c2e780a5..df5c81bb573 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VOP2 = 0;
field bits<1> VOP3 = 0;
field bits<1> VOPC = 0;
+ field bits<1> SDWA = 0;
field bits<1> DPP = 0;
field bits<1> MUBUF = 0;
@@ -64,17 +65,18 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{11} = VOP2;
let TSFlags{12} = VOP3;
let TSFlags{13} = VOPC;
- let TSFlags{14} = DPP;
-
- let TSFlags{15} = MUBUF;
- let TSFlags{16} = MTBUF;
- let TSFlags{17} = SMRD;
- let TSFlags{18} = DS;
- let TSFlags{19} = MIMG;
- let TSFlags{20} = FLAT;
- let TSFlags{21} = WQM;
- let TSFlags{22} = VGPRSpill;
- let TSFlags{23} = VOPAsmPrefer32Bit;
+ let TSFlags{14} = SDWA;
+ let TSFlags{15} = DPP;
+
+ let TSFlags{16} = MUBUF;
+ let TSFlags{17} = MTBUF;
+ let TSFlags{18} = SMRD;
+ let TSFlags{19} = DS;
+ let TSFlags{20} = MIMG;
+ let TSFlags{21} = FLAT;
+ let TSFlags{22} = WQM;
+ let TSFlags{23} = VGPRSpill;
+ let TSFlags{24} = VOPAsmPrefer32Bit;
let SchedRW = [Write32Bit];
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 79ef1c66218..105ec8a5ecd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -576,6 +576,22 @@ class DPPOptionalMatchClass <string OpName>: AsmOperandClass {
let IsOptional = 1;
}
+def SDWASelMatchClass : AsmOperandClass {
+ let Name = "SDWASel";
+ let PredicateMethod = "isSDWASel";
+ let ParserMethod = "parseSDWASel";
+ let RenderMethod = "addImmOperands";
+ let IsOptional = 1;
+}
+
+def SDWADstUnusedMatchClass : AsmOperandClass {
+ let Name = "SDWADstUnused";
+ let PredicateMethod = "isSDWADstUnused";
+ let ParserMethod = "parseSDWADstUnused";
+ let RenderMethod = "addImmOperands";
+ let IsOptional = 1;
+}
+
class OptionalImmAsmOperand <string OpName> : AsmOperandClass {
let Name = "Imm"#OpName;
let PredicateMethod = "isImm";
@@ -737,11 +753,31 @@ def bound_ctrl : Operand <i1> {
let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">;
}
+def dst_sel : Operand <i32> {
+ let PrintMethod = "printSDWADstSel";
+ let ParserMatchClass = SDWASelMatchClass;
+}
+
+def src0_sel : Operand <i32> {
+ let PrintMethod = "printSDWASrc0Sel";
+ let ParserMatchClass = SDWASelMatchClass;
+}
+
+def src1_sel : Operand <i32> {
+ let PrintMethod = "printSDWASrc1Sel";
+ let ParserMatchClass = SDWASelMatchClass;
+}
+
def hwreg : Operand <i16> {
let PrintMethod = "printHwreg";
let ParserMatchClass = HwregMatchClass;
}
+def dst_unused : Operand <i32> {
+ let PrintMethod = "printSDWADstUnused";
+ let ParserMatchClass = SDWADstUnusedMatchClass;
+}
+
} // End OperandType = "OPERAND_IMMEDIATE"
@@ -1316,16 +1352,11 @@ class getVOPSrc0ForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
}
-// Returns the register class to use for source 1 of VOP[12C] for the
-// given VT.
-class getVOPSrc1ForVT<ValueType VT> {
+// Returns the vreg register class to use for source operand given VT
+class getVregSrcForVT<ValueType VT> {
RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
}
-// Returns the register class to use for DPP source operands.
-class getDPPSrcForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
-}
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
@@ -1431,7 +1462,40 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
/* endif */)));
}
-class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
+class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
+ bit HasModifiers> {
+
+ dag ret = !if (!eq(NumSrcArgs, 0),
+ // VOP1 without input operands (V_NOP)
+ (ins),
+ !if (!eq(NumSrcArgs, 1),
+ !if (!eq(HasModifiers, 1),
+ // VOP1_SDWA with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel)
+ /* else */,
+ // VOP1_SDWA without modifiers
+ (ins Src0RC:$src0, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel)
+ /* endif */)
+ /* NumSrcArgs == 2 */,
+ !if (!eq(HasModifiers, 1),
+ // VOP2_SDWA with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+ ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel)
+ /* else */,
+ // VOP2_DPP without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1,
+ dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel)
+ /* endif */)));
+}
+
+// Outs for DPP and SDWA
+class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
dag ret = !if(HasDst,
!if(!eq(DstVT.Size, 1),
(outs DstRCDPP:$sdst), // sdst for VOPC
@@ -1484,20 +1548,41 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl";
}
-class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+ string dst = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ "$sdst",
+ "$vdst"),
+ ""); // use $sdst for VOPC
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string args = !if(!eq(HasModifiers, 0),
+ getAsm32<0, NumSrcArgs, DstVT>.ret,
+ ", "#src0#src1#", $clamp");
+ string sdwa = !if(!eq(NumSrcArgs, 0),
+ "",
+ !if(!eq(NumSrcArgs, 1),
+ " $dst_sel $dst_unused $src0_sel",
+ " $dst_sel $dst_unused $src0_sel $src1_sel"
+ )
+ );
+ string ret = dst#args#sdwa;
+}
+
+// Function that checks if instruction supports DPP and SDWA
+class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
bit ret = !if(!eq(NumSrcArgs, 3),
- 0, // NumSrcArgs == 3 - No DPP for VOP3
- !if(!eq(DstVT.Size, 1),
- 0, // No DPP for VOPC
- !if(!eq(DstVT.Size, 64),
- 0, // 64-bit dst - No DPP for 64-bit operands
+ 0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
+ !if(!eq(DstVT.Size, 64),
+ 0, // 64-bit dst - No DPP or SDWA for 64-bit operands
+ !if(!eq(Src0VT.Size, 64),
+ 0, // 64-bit src0
!if(!eq(Src0VT.Size, 64),
- 0, // 64-bit src0
- !if(!eq(Src0VT.Size, 64),
- 0, // 64-bit src2
- 1
- )
+ 0, // 64-bit src2
+ 1
)
)
)
@@ -1514,41 +1599,47 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src2VT = ArgVT[3];
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRCSDWA = getVALUDstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
- field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
+ field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
- field RegisterClass Src0DPP = getDPPSrcForVT<Src0VT>.ret;
- field RegisterClass Src1DPP = getDPPSrcForVT<Src1VT>.ret;
-
+ field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
+ field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
+ field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
+ field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
+
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
field bit HasDst32 = HasDst;
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
field bit HasModifiers = hasModifiers<Src0VT>.ret;
- field bit HasDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
-
+ field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
// VOP3b instructions are a special case with a second explicit
// output. This is manually overridden for them.
field dag Outs32 = Outs;
field dag Outs64 = Outs;
- field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsSDWA = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret;
+ field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
}
-class VOP_NO_DPP <VOPProfile p> : VOPProfile <p.ArgVT> {
- let HasDPP = 0;
+class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
+ let HasExt = 0;
}
// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
@@ -1659,12 +1750,12 @@ def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm);
field string Asm32 = "$vdst, $src0, $src1, $imm";
- field bit HasDPP = 0;
+ field bit HasExt = 0;
}
def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1);
field string Asm32 = "$vdst, $src0, $imm, $src1";
- field bit HasDPP = 0;
+ field bit HasExt = 0;
}
def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
@@ -1675,9 +1766,15 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0,
+ InputModsNoDefault:$src1_modifiers, Src1RC32:$src1,
+ VGPR_32:$src2, // stub argument
+ ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, f32>.ret;
let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
+ let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -1787,13 +1884,37 @@ multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
VOP1_DPPe <op.VI>,
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
- let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
+ let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "DPP";
let DisableDecoder = DisableVIDecoder;
let src0_modifiers = !if(p.HasModifiers, ?, 0);
let src1_modifiers = 0;
}
+class SDWADisableFields <VOPProfile p> {
+ bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
+ bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
+ bits<3> src0_modifiers = !if(p.HasModifiers, ?, 0);
+ bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
+ !if(!eq(p.NumSrcArgs, 1), 6,
+ ?));
+ bits<3> src1_modifiers = !if(!eq(p.NumSrcArgs, 0), 0,
+ !if(!eq(p.NumSrcArgs, 1), 0,
+ !if(p.HasModifiers, ?, 0)));
+ bits<3> dst_sel = !if(p.HasDst, ?, 6);
+ bits<2> dst_unused = !if(p.HasDst, ?, 0);
+ bits<1> clamp = !if(p.HasModifiers, ?, 0);
+}
+
+class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
+ VOP1_SDWAe <op.VI>,
+ VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
+ SDWADisableFields <p> {
+ let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
+ let DecoderNamespace = "SDWA";
+ let DisableDecoder = DisableVIDecoder;
+}
+
multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
string asm = opName#p.Asm32> {
@@ -1851,13 +1972,22 @@ multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
VOP2_DPPe <op.VI>,
VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
- let AssemblerPredicates = !if(p.HasDPP, [isVI], [DisableInst]);
+ let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
let DecoderNamespace = "DPP";
let DisableDecoder = DisableVIDecoder;
let src0_modifiers = !if(p.HasModifiers, ?, 0);
let src1_modifiers = !if(p.HasModifiers, ?, 0);
}
+class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
+ VOP2_SDWAe <op.VI>,
+ VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
+ SDWADisableFields <p> {
+ let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
+ let DecoderNamespace = "SDWA";
+ let DisableDecoder = DisableVIDecoder;
+}
+
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
@@ -2089,6 +2219,8 @@ multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
p.HasModifiers>;
def _dpp : VOP1_DPP <op, opName, p>;
+
+ def _sdwa : VOP1_SDWA <op, opName, p>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -2122,6 +2254,8 @@ multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
revOp, p.HasModifiers>;
def _dpp : VOP2_DPP <op, opName, p>;
+
+ def _sdwa : VOP2_SDWA <op, opName, p>;
}
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 64d42b76d44..6763d8b0c1b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1394,13 +1394,13 @@ defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
VOP_F32_F32, int_amdgcn_frexp_mant
>;
let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
-defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_DPP<VOP_NONE>>;
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
}
let Uses = [M0, EXEC] in {
-defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_DPP<VOP_I32_I32>>;
-defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_DPP<VOP_I32_I32>>;
-defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_DPP<VOP_I32_I32>>;
+defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_EXT<VOP_I32_I32>>;
+defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_EXT<VOP_I32_I32>>;
+defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
} // End Uses = [M0, EXEC]
// These instruction only exist on SI and CI
diff --git a/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/llvm/lib/Target/AMDGPU/VIInstrFormats.td
index d801a3d120e..a391cec8b22 100644
--- a/llvm/lib/Target/AMDGPU/VIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/VIInstrFormats.td
@@ -225,6 +225,61 @@ class VOP2_DPPe <bits<6> op> : VOP_DPPe {
let Inst{31} = 0x0; //encoding
}
+class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
+ VOPAnyCommon <outs, ins, asm, pattern> {
+ let SDWA = 1;
+ let Size = 8;
+}
+
+class VOP_SDWAe : Enc64 {
+ bits<8> src0;
+ bits<3> src0_sel;
+ bits<3> src0_modifiers; // {abs,neg,sext}
+ bits<3> src1_sel;
+ bits<3> src1_modifiers;
+ bits<3> dst_sel;
+ bits<2> dst_unused;
+ bits<1> clamp;
+
+ let Inst{39-32} = src0;
+ let Inst{42-40} = dst_sel;
+ let Inst{44-43} = dst_unused;
+ let Inst{45} = clamp;
+ let Inst{50-48} = src0_sel;
+ let Inst{53-51} = src0_modifiers;
+ let Inst{58-56} = src1_sel;
+ let Inst{61-59} = src1_modifiers;
+}
+
+class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
+ bits<8> vdst;
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = op;
+ let Inst{24-17} = vdst;
+ let Inst{31-25} = 0x3f; // encoding
+}
+
+class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
+ bits<8> vdst;
+ bits<8> src1;
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = src1;
+ let Inst{24-17} = vdst;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+}
+
+class VOPC_SDWAe <bits<8> op> : VOP_SDWAe {
+ bits<8> src1;
+
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = src1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e; // encoding
+}
+
class EXPe_vi : EXPe {
let Inst{31-26} = 0x31; //encoding
}
OpenPOWER on IntegriCloud