summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp48
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td411
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp126
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td45
8 files changed, 535 insertions, 121 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 061edf8fbc3..96f819fd0e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3664,6 +3664,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(LOAD_CONSTANT)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+ NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
+ NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index ed9721970dd..a45234e2b39 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -403,6 +403,8 @@ enum NodeType : unsigned {
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
+ TBUFFER_STORE_FORMAT_X3,
+ TBUFFER_LOAD_FORMAT,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 0dc939bd795..7b8756050b7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -152,6 +152,8 @@ public:
ImmTyExpTgt,
ImmTyExpCompr,
ImmTyExpVM,
+ ImmTyDFMT,
+ ImmTyNFMT,
ImmTyHwreg,
ImmTyOff,
ImmTySendMsg,
@@ -294,6 +296,8 @@ public:
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
+ bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
+ bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -638,6 +642,8 @@ public:
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
+ case ImmTyDFMT: OS << "DFMT"; break;
+ case ImmTyNFMT: OS << "NFMT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
@@ -1033,6 +1039,8 @@ public:
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
+ void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
+
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultTFE() const;
@@ -3820,6 +3828,44 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
+void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle the case where soffset is an immediate
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ continue;
+ }
+ assert(Op.isImm());
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOffset);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+}
+
//===----------------------------------------------------------------------===//
// mimg
//===----------------------------------------------------------------------===//
@@ -4000,6 +4046,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
{"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
+ {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
+ {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 2aca65ac843..2e96c14eaa3 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -57,6 +57,11 @@ class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
string OpName = NAME # suffix;
}
+class MTBUFAddr64Table <bit is_addr64, string suffix = ""> {
+ bit IsAddr64 = is_addr64;
+ string OpName = NAME # suffix;
+}
+
//===----------------------------------------------------------------------===//
// MTBUF classes
//===----------------------------------------------------------------------===//
@@ -78,14 +83,31 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
let EXP_CNT = 1;
let MTBUF = 1;
let Uses = [EXEC];
-
let hasSideEffects = 0;
let SchedRW = [WriteVMEM];
+
+ let AsmMatchConverter = "cvtMtbuf";
+
+ bits<1> offen = 0;
+ bits<1> idxen = 0;
+ bits<1> addr64 = 0;
+ bits<1> has_vdata = 1;
+ bits<1> has_vaddr = 1;
+ bits<1> has_glc = 1;
+ bits<1> glc_value = 0; // the value for glc if no such operand
+ bits<4> dfmt_value = 1; // the value for dfmt if no such operand
+ bits<3> nfmt_value = 0; // the value for nfmt if no such operand
+ bits<1> has_srsrc = 1;
+ bits<1> has_soffset = 1;
+ bits<1> has_offset = 1;
+ bits<1> has_slc = 1;
+ bits<1> has_tfe = 1;
+ bits<1> has_dfmt = 1;
+ bits<1> has_nfmt = 1;
}
class MTBUF_Real <MTBUF_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
- Enc64 {
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -97,57 +119,168 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
- bits<8> vdata;
bits<12> offset;
- bits<1> offen;
- bits<1> idxen;
- bits<1> glc;
- bits<1> addr64;
- bits<4> dfmt;
- bits<3> nfmt;
- bits<8> vaddr;
- bits<7> srsrc;
- bits<1> slc;
- bits<1> tfe;
- bits<8> soffset;
-
- let Inst{11-0} = offset;
- let Inst{12} = offen;
- let Inst{13} = idxen;
- let Inst{14} = glc;
- let Inst{22-19} = dfmt;
- let Inst{25-23} = nfmt;
- let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = vaddr;
- let Inst{47-40} = vdata;
- let Inst{52-48} = srsrc{6-2};
- let Inst{54} = slc;
- let Inst{55} = tfe;
- let Inst{63-56} = soffset;
+ bits<1> glc;
+ bits<4> dfmt;
+ bits<3> nfmt;
+ bits<8> vaddr;
+ bits<8> vdata;
+ bits<7> srsrc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
+}
+
+class getMTBUFInsDA<list<RegisterClass> vdataList,
+ list<RegisterClass> vaddrList=[]> {
+ RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
+ RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
+ dag InsNoData = !if(!empty(vaddrList),
+ (ins SReg_128:$srsrc, SCSrc_b32:$soffset,
+ offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe),
+ (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
+ offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, slc:$slc, tfe:$tfe)
+ );
+ dag InsData = !if(!empty(vaddrList),
+ (ins vdataClass:$vdata, SReg_128:$srsrc,
+ SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
+ SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ slc:$slc, tfe:$tfe)
+ );
+ dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
-class MTBUF_Load_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
- opName, (outs regClass:$dst),
- (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
- i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
- " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
- " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
+class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
+ dag ret =
+ !if(!eq(addrKind, BUFAddrKind.Offset), getMTBUFInsDA<vdataList>.ret,
+ !if(!eq(addrKind, BUFAddrKind.OffEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.IdxEn), getMTBUFInsDA<vdataList, [VGPR_32]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
+ !if(!eq(addrKind, BUFAddrKind.Addr64), getMTBUFInsDA<vdataList, [VReg_64]>.ret,
+ (ins))))));
+}
+
+class getMTBUFAsmOps<int addrKind> {
+ string Pfx =
+ !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
+ !if(!eq(addrKind, BUFAddrKind.OffEn),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
+ !if(!eq(addrKind, BUFAddrKind.IdxEn),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
+ !if(!eq(addrKind, BUFAddrKind.BothEn),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
+ !if(!eq(addrKind, BUFAddrKind.Addr64),
+ "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
+ "")))));
+ string ret = Pfx # "$offset";
+}
+
+class MTBUF_SetupAddr<int addrKind> {
+ bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+
+ bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
+ !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+
+ bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
+
+ bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
+}
+
+class MTBUF_Load_Pseudo <string opName,
+ int addrKind,
+ RegisterClass vdataClass,
+ list<dag> pattern=[],
+ // Workaround bug bz30254
+ int addrKindCopy = addrKind>
+ : MTBUF_Pseudo<opName,
+ (outs vdataClass:$vdata),
+ getMTBUFIns<addrKindCopy>.ret,
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ pattern>,
+ MTBUF_SetupAddr<addrKindCopy> {
+ let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 1;
let mayStore = 0;
}
-class MTBUF_Store_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
- opName, (outs),
- (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
- i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
- SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
- " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
- " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
+multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
+ ValueType load_vt = i32,
+ SDPatternOperator ld = null_frag> {
+
+ def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ [(set load_vt:$vdata,
+ (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
+ i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MTBUFAddr64Table<0>;
+
+ def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ [(set load_vt:$vdata,
+ (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
+ i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MTBUFAddr64Table<1>;
+
+ def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+ let DisableWQM = 1 in {
+ def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
+ def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ }
+}
+
+class MTBUF_Store_Pseudo <string opName,
+ int addrKind,
+ RegisterClass vdataClass,
+ list<dag> pattern=[],
+ // Workaround bug bz30254
+ int addrKindCopy = addrKind,
+ RegisterClass vdataClassCopy = vdataClass>
+ : MTBUF_Pseudo<opName,
+ (outs),
+ getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ pattern>,
+ MTBUF_SetupAddr<addrKindCopy> {
+ let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let mayLoad = 0;
let mayStore = 1;
}
+multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
+ ValueType store_vt = i32,
+ SDPatternOperator st = null_frag> {
+
+ def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+ i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i1:$slc, i1:$tfe))]>,
+ MTBUFAddr64Table<0>;
+
+ def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i1:$slc, i1:$tfe))]>,
+ MTBUFAddr64Table<1>;
+
+ def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+ let DisableWQM = 1 in {
+ def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
+ def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// MUBUF classes
//===----------------------------------------------------------------------===//
@@ -676,14 +809,14 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
// MTBUF Instructions
//===----------------------------------------------------------------------===//
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>;
-def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>;
-def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>;
-def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>;
-def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>;
-def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
} // End let SubtargetPredicate = isGCN
@@ -1093,22 +1226,98 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OF
// MTBUF Patterns
//===----------------------------------------------------------------------===//
-// TBUFFER_STORE_FORMAT_*, addr64=0
-class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF_Pseudo opcode> : Pat<
- (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
- i32:$soffset, imm:$inst_offset, imm:$dfmt,
- imm:$nfmt, imm:$offen, imm:$idxen,
- imm:$glc, imm:$slc, imm:$tfe),
- (opcode
- $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
- (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
- (as_i1imm $slc), (as_i1imm $tfe), $soffset)
->;
+//===----------------------------------------------------------------------===//
+// tbuffer_load/store_format patterns
+//===----------------------------------------------------------------------===//
+
+multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode> {
+ def : Pat<
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
-def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
-def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
-def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
-def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
+ def : Pat<
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ (!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+}
+
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
+
+multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+ string opcode> {
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i8imm $dfmt),
+ (as_i8imm $nfmt), (as_i1imm $glc),
+ (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i8imm $dfmt),
+ (as_i8imm $nfmt), (as_i1imm $glc),
+ (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i8imm $dfmt),
+ (as_i8imm $nfmt), (as_i1imm $glc),
+ (as_i1imm $slc), 0)
+ >;
+
+ def : Pat<
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
+ imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
+ $vdata,
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset),
+ (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ >;
+}
+
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4i32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
} // End let Predicates = [isGCN]
@@ -1224,21 +1433,44 @@ def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
MTBUF_Real<ps>,
+ Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
let AssemblerPredicate=isSICI;
let DecoderNamespace="SICI";
- bits<1> addr64;
- let Inst{15} = addr64;
+ let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{15} = ps.addr64;
let Inst{18-16} = op;
+ let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
+ let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>;
-def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>;
-def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>;
-def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>;
-def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>;
+multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
+ def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
+//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
//===----------------------------------------------------------------------===//
// CI
@@ -1350,16 +1582,39 @@ def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
MTBUF_Real<ps>,
+ Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
let AssemblerPredicate=isVI;
let DecoderNamespace="VI";
+ let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
+ let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
+ let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>;
-def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>;
-def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>;
-def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>;
-def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>;
+multiclass MTBUF_Real_AllAddr_vi<bits<4> op> {
+ def _OFFSET_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>;
+//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>;
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 2ff9c3f3190..7c31c8e397b 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -231,6 +231,24 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
O << " vm";
}
+void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " dfmt:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " nfmt:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
+}
+
void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
const MCRegisterInfo &MRI) {
switch (RegNo) {
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index f7b9c88e20f..7bbf99a85f4 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -88,6 +88,10 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printExpVM(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNFMT(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3563eb365b9..880c0dda945 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3295,6 +3295,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec: {
@@ -3320,7 +3322,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // glc
Op.getOperand(6) // slc
};
- MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
@@ -3335,6 +3336,29 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
}
+ case Intrinsic::amdgcn_tbuffer_load: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // voffset
+ Op.getOperand(5), // soffset
+ Op.getOperand(6), // offset
+ Op.getOperand(7), // dfmt
+ Op.getOperand(8), // nfmt
+ Op.getOperand(9), // glc
+ Op.getOperand(10) // slc
+ };
+
+ EVT VT = Op.getOperand(2).getValueType();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad,
+ VT.getStoreSize(), VT.getStoreSize());
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
// Basic sample.
case Intrinsic::amdgcn_image_sample:
case Intrinsic::amdgcn_image_sample_cl:
@@ -3400,10 +3424,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
@@ -3470,33 +3494,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
Op.getOperand(2), Op.getOperand(3));
}
- case AMDGPUIntrinsic::SI_tbuffer_store: {
- SDValue Ops[] = {
- Chain,
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4),
- Op.getOperand(5),
- Op.getOperand(6),
- Op.getOperand(7),
- Op.getOperand(8),
- Op.getOperand(9),
- Op.getOperand(10),
- Op.getOperand(11),
- Op.getOperand(12),
- Op.getOperand(13),
- Op.getOperand(14)
- };
-
- EVT VT = Op.getOperand(3).getValueType();
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
case AMDGPUIntrinsic::AMDGPU_kill: {
SDValue Src = Op.getOperand(2);
if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
@@ -3512,7 +3509,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
- const MachineFunction &MF = DAG.getMachineFunction();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
if (WGSize <= ST.getWavefrontSize())
@@ -3521,6 +3517,76 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
return SDValue();
};
+ case AMDGPUIntrinsic::SI_tbuffer_store: {
+
+ // Extract vindex and voffset from vaddr as appropriate
+ const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10));
+ const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11));
+ SDValue VAddr = Op.getOperand(5);
+
+ SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
+
+ assert(!(OffEn->isOne() && IdxEn->isOne()) &&
+ "Legacy intrinsic doesn't support both offset and index - use new version");
+
+ SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
+ SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
+
+ // Deal with the vec-3 case
+ const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4));
+ auto Opcode = NumChannels->getZExtValue() == 3 ?
+ AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
+
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(3), // vdata
+ Op.getOperand(2), // rsrc
+ VIndex,
+ VOffset,
+ Op.getOperand(6), // soffset
+ Op.getOperand(7), // inst_offset
+ Op.getOperand(8), // dfmt
+ Op.getOperand(9), // nfmt
+ Op.getOperand(12), // glc
+ Op.getOperand(13), // slc
+ };
+
+ const ConstantSDNode *tfe = cast<ConstantSDNode>(Op.getOperand(14));
+ assert(tfe->getZExtValue() == 0 &&
+ "Value of tfe other than zero is unsupported");
+
+ EVT VT = Op.getOperand(3).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(Opcode, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+
+ case Intrinsic::amdgcn_tbuffer_store: {
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Op.getOperand(5), // voffset
+ Op.getOperand(6), // soffset
+ Op.getOperand(7), // offset
+ Op.getOperand(8), // dfmt
+ Op.getOperand(9), // nfmt
+ Op.getOperand(10), // glc
+ Op.getOperand(11) // slc
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+
default:
return Op;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a8686ec2999..3b4a8b5d1e8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -41,25 +41,41 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
- SDTypeProfile<0, 13,
- [SDTCisVT<0, v4i32>, // rsrc(SGPR)
- SDTCisVT<1, iAny>, // vdata(VGPR)
- SDTCisVT<2, i32>, // num_channels(imm)
- SDTCisVT<3, i32>, // vaddr(VGPR)
+def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT",
+ SDTypeProfile<1, 9,
+ [ // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
SDTCisVT<4, i32>, // soffset(SGPR)
- SDTCisVT<5, i32>, // inst_offset(imm)
+ SDTCisVT<5, i32>, // offset(imm)
SDTCisVT<6, i32>, // dfmt(imm)
SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // offen(imm)
- SDTCisVT<9, i32>, // idxen(imm)
- SDTCisVT<10, i32>, // glc(imm)
- SDTCisVT<11, i32>, // slc(imm)
- SDTCisVT<12, i32> // tfe(imm)
+ SDTCisVT<8, i32>, // glc(imm)
+ SDTCisVT<9, i32> // slc(imm)
]>,
- [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
>;
+def SDTtbuffer_store : SDTypeProfile<0, 10,
+ [ // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // dfmt(imm)
+ SDTCisVT<7, i32>, // nfmt(imm)
+ SDTCisVT<8, i32>, // glc(imm)
+ SDTCisVT<9, i32> // slc(imm)
+ ]>;
+
+def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
+ SDTtbuffer_store,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+
def SDTBufferLoad : SDTypeProfile<1, 5,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
@@ -547,6 +563,9 @@ def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
+def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
+def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
+
def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
OpenPOWER on IntegriCloud