diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 71 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 585 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 264 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SMInstructions.td | 277 |
16 files changed, 1071 insertions, 317 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index acd90b7cc9a..59a27ab1401 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -125,10 +125,10 @@ private: bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const; + SDValue &TFE, SDValue &DLC) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE) const; + SDValue &SLC, SDValue &TFE, SDValue &DLC) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -141,19 +141,19 @@ private: bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const; + SDValue &TFE, SDValue &DLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; - bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, + bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; - bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, + bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; template <bool IsSigned> - bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, @@ -1221,7 +1221,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const { + SDValue &TFE, SDValue &DLC) const { // Subtarget prefers to use flat instruction if (Subtarget->useFlatForGlobal()) return false; @@ -1233,6 +1233,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, if (!SLC.getNode()) SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); + DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1311,7 +1312,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE) const { + SDValue &SLC, SDValue &TFE, + SDValue &DLC) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1319,7 +1321,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE)) + GLC, SLC, TFE, DLC)) return false; ConstantSDNode *C = cast<ConstantSDNode>(Addr64); @@ -1341,9 +1343,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); - SDValue GLC, TFE; + SDValue GLC, TFE, DLC; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); } static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { @@ -1468,13 +1470,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const { + SDValue &TFE, SDValue &DLC) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE)) + GLC, SLC, TFE, DLC)) return false; if (!cast<ConstantSDNode>(Offen)->getSExtValue() && @@ -1496,57 +1498,42 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset ) const { - SDValue GLC, SLC, TFE; + SDValue GLC, SLC, TFE, DLC; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const { - SDValue GLC, TFE; + SDValue GLC, TFE, DLC; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); } template <bool IsSigned> -bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - int64_t OffsetVal = 0; - - if (Subtarget->hasFlatInstOffsets() && - CurDAG->isBaseWithConstantOffset(Addr)) { - SDValue N0 = Addr.getOperand(0); - SDValue N1 = Addr.getOperand(1); - int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); - - if ((IsSigned && isInt<13>(COffsetVal)) || - (!IsSigned && isUInt<12>(COffsetVal))) { - Addr = N0; - OffsetVal = COffsetVal; - } - } - - VAddr = Addr; - Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); - SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); - - return true; + return static_cast<const SITargetLowering*>(getTargetLowering())-> + SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); } -bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); + return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC); } -bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); + return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC); } bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9115f3040a2..409fbfa22f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2886,6 +2886,61 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { return true; } +// Find a load or store from corresponding pattern root. +// Roots may be build_vector, bitconvert or their combinations. +static MemSDNode* findMemSDNode(SDNode *N) { + N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); + if (MemSDNode *MN = dyn_cast<MemSDNode>(N)) + return MN; + assert(isa<BuildVectorSDNode>(N)); + for (SDValue V : N->op_values()) + if (MemSDNode *MN = + dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V))) + return MN; + llvm_unreachable("cannot find MemSDNode in the pattern!"); +} + +bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, + SelectionDAG &DAG, + SDNode *N, + SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + const GCNSubtarget &ST = + DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); + int64_t OffsetVal = 0; + + if (ST.hasFlatInstOffsets() && + (!ST.hasFlatSegmentOffsetBug() || + findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && + DAG.isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { + if ((IsSigned && isInt<12>(COffsetVal)) || + (!IsSigned && isUInt<11>(COffsetVal))) { + Addr = N0; + OffsetVal = COffsetVal; + } + } else { + if ((IsSigned && isInt<13>(COffsetVal)) || + (!IsSigned && isUInt<12>(COffsetVal))) { + Addr = N0; + OffsetVal = COffsetVal; + } + } + } + + VAddr = Addr; + Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1); + + return true; +} + // Replace load of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 93240d8f1a5..74d5d80ee68 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -323,6 +323,10 @@ public: } AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + + bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, + SDValue &SLC) const; }; namespace AMDGPUISD { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f35c2a1a259..c0a85498459 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -356,7 +356,8 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { .add(I.getOperand(0)) .addImm(0) // offset .addImm(0) // glc - .addImm(0); // slc + .addImm(0) // slc + .addImm(0); // dlc // Now that we selected an opcode, we need to constrain the register @@ -532,7 +533,8 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { .addReg(PtrReg) .addImm(0) // offset .addImm(0) // glc - .addImm(0); // slc + .addImm(0) // slc + .addImm(0); // dlc bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); I.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 4c20cd2f0d3..e963bc8207f 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -139,6 +139,7 @@ public: ImmTyInstOffset, ImmTyOffset0, ImmTyOffset1, + ImmTyDLC, ImmTyGLC, ImmTySLC, ImmTyTFE, @@ -314,6 +315,7 @@ public: bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isLDS() const { return isImmTy(ImmTyLDS); } + bool isDLC() const { return isImmTy(ImmTyDLC); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } @@ -676,6 +678,7 @@ public: case ImmTyInstOffset: OS << "InstOffset"; break; case ImmTyOffset0: OS << "Offset0"; break; case ImmTyOffset1: OS << "Offset1"; break; + case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; case ImmTyTFE: OS << "TFE"; break; @@ -1184,6 +1187,7 @@ public: void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); + AMDGPUOperand::Ptr defaultDLC() const; AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultSLC() const; @@ -2303,13 +2307,26 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } - if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + if (TSFlags & SIInstrFlags::FLAT) { // FIXME: Produces error without correct column reported. - auto OpNum = - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + auto Opcode = Inst.getOpcode(); + auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); - if (Op.getImm() != 0) + if (!hasFlatOffsets() && Op.getImm() != 0) return Match_InvalidOperand; + + // GFX10: Address offset is 12-bit signed byte offset. Must be positive for + // FLAT segment. For FLAT segment MSB is ignored and forced to zero. + if (isGFX10()) { + if (TSFlags & SIInstrFlags::IsNonFlatSeg) { + if (!isInt<12>(Op.getImm())) + return Match_InvalidOperand; + } else { + if (!isUInt<11>(Op.getImm())) + return Match_InvalidOperand; + } + } } return Match_Success; @@ -3887,6 +3904,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, } } + if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) + return MatchOperand_ParseFail; + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -5101,6 +5121,10 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { // mubuf //===----------------------------------------------------------------------===// +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); +} + AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); } @@ -5177,6 +5201,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, if (!IsLdsOpcode) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } + + if (isGFX10()) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); } void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { @@ -5214,6 +5241,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + + if (isGFX10()) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); } //===----------------------------------------------------------------------===// @@ -5249,8 +5279,12 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, } } + bool IsGFX10 = isGFX10(); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); @@ -5353,6 +5387,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, + {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, @@ -5581,7 +5616,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); } - // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): + // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0. @@ -6031,7 +6066,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 570d7016091..ef1ccd2c1aa 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">; -def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">; +def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">; def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>; def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>; -def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; +def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">; def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; @@ -96,7 +96,9 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, bits<1> has_vdata = 1; bits<1> has_vaddr = 1; bits<1> has_glc = 1; + bits<1> has_dlc = 1; bits<1> glc_value = 0; // the value for glc if no such operand + bits<1> dlc_value = 0; // the value for dlc if no such operand bits<1> has_srsrc = 1; bits<1> has_soffset = 1; bits<1> has_offset = 1; @@ -119,6 +121,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> : bits<12> offset; bits<1> glc; + bits<1> dlc; bits<7> format; bits<8> vaddr; bits<8> vdata; @@ -137,17 +140,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList, RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe), + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe) + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe), + SLC:$slc, TFE:$tfe, DLC:$dlc), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe) + SLC:$slc, TFE:$tfe, DLC:$dlc) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -198,7 +201,7 @@ class MTBUF_Load_Pseudo <string opName, : MTBUF_Pseudo<opName, (outs vdataClass:$vdata), getMTBUFIns<addrKindCopy>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -213,13 +216,13 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format, - i1:$glc, i1:$slc, i1:$tfe)))]>, + i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, - i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>, + i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; @@ -244,7 +247,7 @@ class MTBUF_Store_Pseudo <string opName, : MTBUF_Pseudo<opName, (outs), getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -259,13 +262,13 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass, def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe))]>, + i1:$slc, i1:$tfe, i1:$dlc))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe))]>, + i1:$slc, i1:$tfe, i1:$dlc))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; @@ -323,7 +326,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, bits<1> has_vdata = 1; bits<1> has_vaddr = 1; bits<1> has_glc = 1; + bits<1> has_dlc = 1; bits<1> glc_value = 0; // the value for glc if no such operand + bits<1> dlc_value = 0; // the value for dlc if no such operand bits<1> has_srsrc = 1; bits<1> has_soffset = 1; bits<1> has_offset = 1; @@ -332,7 +337,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, bits<4> dwords = 0; } -class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> : +class MUBUF_Real <MUBUF_Pseudo ps> : InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> { let isPseudo = 0; @@ -347,6 +352,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> : bits<12> offset; bits<1> glc; + bits<1> dlc; bits<8> vaddr; bits<8> vdata; bits<7> srsrc; @@ -357,7 +363,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> : // For cache invalidation instructions. -class MUBUF_Invalidate <string opName, SDPatternOperator node> : +class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> : MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> { let AsmMatchConverter = ""; @@ -372,7 +378,9 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node> : let has_vdata = 0; let has_vaddr = 0; let has_glc = 0; + let has_dlc = 0; let glc_value = 0; + let dlc_value = 0; let has_srsrc = 0; let has_soffset = 0; let has_offset = 0; @@ -399,7 +407,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList, ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins), (ins TFE:$tfe)) + !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc)) ); } @@ -459,7 +467,7 @@ class MUBUF_Load_Pseudo <string opName, !con(getMUBUFIns<addrKindCopy, [], isLds>.ret, !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))), " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe"), + !if(isLds, " lds", "$tfe") # "$dlc", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -489,7 +497,7 @@ multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, !if(isLds, [], [(set load_vt:$vdata, - (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>, + (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>, MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, @@ -497,7 +505,7 @@ multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, !if(isLds, [], [(set load_vt:$vdata, - (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>, + (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>, MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>; @@ -530,7 +538,7 @@ class MUBUF_Store_Pseudo <string opName, : MUBUF_Pseudo<opName, (outs), getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -546,12 +554,12 @@ multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass, def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; @@ -637,6 +645,7 @@ class MUBUF_Atomic_Pseudo<string opName, let hasSideEffects = 1; let DisableWQM = 1; let has_glc = 0; + let has_dlc = 0; let has_tfe = 0; let maybeAtomic = 1; } @@ -655,6 +664,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind, AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; let glc_value = 0; + let dlc_value = 0; let AsmMatchConverter = "cvtMubufAtomic"; } @@ -672,6 +682,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind, AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> { let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret; let glc_value = 1; + let dlc_value = 0; let Constraints = "$vdata = $vdata_in"; let DisableEncoding = "$vdata_in"; let AsmMatchConverter = "cvtMubufAtomicReturn"; @@ -1051,6 +1062,11 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", } // End let SubtargetPredicate = isGFX7Plus +let SubtargetPredicate = isGFX10Plus in { + def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; + def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; +} // End SubtargetPredicate = isGFX10Plus + //===----------------------------------------------------------------------===// // MUBUF Patterns //===----------------------------------------------------------------------===// @@ -1063,6 +1079,10 @@ def extract_slc : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8); }]>; +def extract_dlc : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// @@ -1073,21 +1093,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, imm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1096,7 +1116,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1144,21 +1164,23 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (as_i16imm $offset), (extract_glc $cachepolicy), + (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, imm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (as_i16imm $offset), (extract_glc $cachepolicy), + (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1167,8 +1189,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), + (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1322,8 +1344,8 @@ def : GCNPat< class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt, PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) >; multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, @@ -1331,12 +1353,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) >; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) >; } @@ -1355,8 +1377,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), - (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), + (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) >; } @@ -1377,12 +1399,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen, def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1392,12 +1414,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen, ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) >; } @@ -1435,12 +1457,12 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1454,8 +1476,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) >; } @@ -1468,13 +1490,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1512,7 +1534,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, imm:$format, imm:$cachepolicy, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1520,7 +1542,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, imm:$format, imm:$cachepolicy, imm)), (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1528,7 +1550,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, imm:$format, imm:$cachepolicy, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1538,7 +1560,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1570,7 +1592,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, imm:$format, imm:$cachepolicy, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1578,7 +1600,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, imm:$format, imm:$cachepolicy, imm), (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1586,7 +1608,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, imm:$format, imm:$cachepolicy, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1596,7 +1618,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1626,24 +1648,18 @@ let SubtargetPredicate = HasPackedD16VMem in { //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Base ENC_MUBUF for GFX6, GFX7. +// Base ENC_MUBUF for GFX6, GFX7, GFX10. //===----------------------------------------------------------------------===// -class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> : - MUBUF_Real<op, ps>, - Enc64, - SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> { - let AssemblerPredicate=isGFX6GFX7; - let DecoderNamespace="GFX6GFX7"; - +class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> : + MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{15} = ps.addr64; let Inst{16} = !if(ps.lds, 1, 0); let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding + let Inst{31-26} = 0x38; let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); @@ -1652,125 +1668,250 @@ class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> : let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -multiclass MUBUF_Real_AllAddr_si<bits<7> op> { - def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; - def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>; - def _OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; - def _IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; - def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; -} - -multiclass MUBUF_Real_AllAddr_Lds_si<bits<7> op> { - - def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_si">; - def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, - MUBUFLdsTable<0, NAME # "_ADDR64_si">; - def _OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_si">; - def _IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_si">; - def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_si">; - - def _LDS_OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_si">; - def _LDS_ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>, - MUBUFLdsTable<1, NAME # "_ADDR64_si">; - def _LDS_OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_si">; - def _LDS_IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_si">; - def _LDS_BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_si">; -} - -multiclass MUBUF_Real_Atomic_si<bits<7> op> : MUBUF_Real_AllAddr_si<op> { - def _OFFSET_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; - def _ADDR64_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>; - def _OFFEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; - def _IDXEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; - def _BOTHEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; -} - -defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_si <0x00>; -defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>; -defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>; -defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>; -defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>; -defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>; -defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>; -defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>; -defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_si <0x08>; -defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_si <0x09>; -defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_si <0x0a>; -defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_si <0x0b>; -defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_si <0x0c>; -defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>; -defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>; -defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_si <0x0f>; -defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_si <0x18>; -defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_si <0x1a>; -defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_si <0x1c>; -defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_si <0x1d>; -defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_si <0x1e>; -defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_si <0x1f>; - -defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_si <0x30>; -defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_si <0x31>; -defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_si <0x32>; -defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_si <0x33>; -//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomic_si <0x34>; // isn't on CI & VI -defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_si <0x35>; -defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_si <0x36>; -defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_si <0x37>; -defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_si <0x38>; -defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_si <0x39>; -defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_si <0x3a>; -defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_si <0x3b>; -defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_si <0x3c>; -defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_si <0x3d>; - -//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_si <0x3e>; // isn't on VI -//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_si <0x3f>; // isn't on VI -//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_si <0x40>; // isn't on VI -defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_si <0x50>; -defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_si <0x51>; -defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_si <0x52>; -defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_si <0x53>; -//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomic_si <0x54>; // isn't on CI & VI -defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_si <0x55>; -defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_si <0x56>; -defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_si <0x57>; -defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_si <0x58>; -defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_si <0x59>; -defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>; -defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>; -defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>; -defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>; -// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI. -//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI -//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI -//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI - -def BUFFER_WBINVL1_SC_si : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>; -def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>; - -class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> : - MTBUF_Real<ps>, - Enc64, - SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> { - let AssemblerPredicate=isGFX6GFX7; - let DecoderNamespace="GFX6GFX7"; +class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> : + Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> { + let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{25} = op{7}; +} + +class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> : + Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> { + let Inst{15} = ps.addr64; +} + +//===----------------------------------------------------------------------===// +// MUBUF - GFX10. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName, + string asmName> { + def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> { + MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName); + let AsmString = asmName # ps.AsmOperands; + } + } + multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> { + def _BOTHEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; + def _IDXEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + def _OFFEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + def _OFFSET_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + } + multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> { + def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">; + def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">; + def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">; + def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">; + + def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">; + def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">; + def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">; + def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">; + } + multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> : + MUBUF_Real_AllAddr_gfx10<op> { + def _BOTHEN_RTN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + def _IDXEN_RTN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + def _OFFEN_RTN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + def _OFFSET_RTN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + } +} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + +defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; +defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>; +defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>; +defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>; +defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>; +defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>; +defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>; +defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>; +// FIXME-GFX10: Add following instructions: +//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>; +//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>; +defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>; +defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>; +defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>; +defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>; +defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>; +defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>; +defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>; +defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>; + +def BUFFER_GL0_INV_gfx10 : + MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>; +def BUFFER_GL1_INV_gfx10 : + MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>; + +//===----------------------------------------------------------------------===// +// MUBUF - GFX6, GFX7, GFX10. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in { + multiclass MUBUF_Real_gfx6<bits<8> op> { + def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>; + } +} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" + +let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { + multiclass MUBUF_Real_gfx7<bits<8> op> { + def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>; + } +} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" + +let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { + multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> { + def _ADDR64_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>; + def _BOTHEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; + def _IDXEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + def _OFFEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + def _OFFSET_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + } + multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> { + def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">; + def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, + MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">; + def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">; + def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">; + def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">; + + def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">; + def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>, + MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">; + def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">; + def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">; + def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; + } + multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> : + MUBUF_Real_AllAddr_gfx6_gfx7<op> { + def _ADDR64_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>; + def _BOTHEN_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + def _IDXEN_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + def _OFFEN_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + def _OFFSET_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + } +} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" + +multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> : + MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>; + +multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> : + MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>; + +multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> : + MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>; + +// FIXME-GFX6: Following instructions are available only on GFX6. +//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>; +//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>; + +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>; +defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>; +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>; +defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>; +defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>; +defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>; +defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>; +defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>; +defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>; +defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>; + +defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>; +defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>; +defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>; +defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>; +defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>; +defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>; +defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>; +defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>; +defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>; +defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>; +defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>; +defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>; +// FIXME-GFX6-GFX7-GFX10: Add following instructions: +//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; +//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; +//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>; +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>; +// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7. +// FIXME-GFX6-GFX7-GFX10: Add following instructions: +//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; +//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; +//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; + +defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; +defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; +def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>; + +//===----------------------------------------------------------------------===// +// Base ENC_MTBUF for GFX6, GFX7, GFX10. +//===----------------------------------------------------------------------===// +class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> : + MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{15} = ps.addr64; let Inst{18-16} = op; - let Inst{22-19} = dfmt; - let Inst{25-23} = nfmt; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); @@ -1780,43 +1921,83 @@ class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> : let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -multiclass MTBUF_Real_AllAddr_si<bits<3> op> { - def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; - def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>; - def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; - def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; - def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; -} +//===----------------------------------------------------------------------===// +// MTBUF - GFX10. +//===----------------------------------------------------------------------===// -defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>; -defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>; -defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>; +class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> : + Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> { + let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{25-19} = format; + let Inst{53} = op{3}; +} + +let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> { + def _BOTHEN_gfx10 : + MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; + def _IDXEN_gfx10 : + MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; + def _OFFEN_gfx10 : + MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; + def _OFFSET_gfx10 : + MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; + } +} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + +defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>; +defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>; +defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>; +defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>; +defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>; +defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>; +defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>; +defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>; //===----------------------------------------------------------------------===// -// CI -// MTBUF - GFX6, GFX7. +// MTBUF - GFX6, GFX7, GFX10. //===----------------------------------------------------------------------===// -class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> : - MUBUF_Real_si<op, ps> { - let AssemblerPredicate = isGFX7Only; - let DecoderNamespace = "GFX7"; +class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> : + Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> { + let Inst{15} = ps.addr64; + let Inst{22-19} = dfmt; + let Inst{25-23} = nfmt; } -def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>; +let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { + multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> { + def _ADDR64_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>; + def _BOTHEN_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; + def _IDXEN_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>; + def _OFFEN_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>; + def _OFFSET_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; + } +} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" + +multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> : + MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; //===----------------------------------------------------------------------===// -// GFX8, GFX9 (VI). +// GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> : - MUBUF_Real<op, ps>, + MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> { let AssemblerPredicate = isGFX8GFX9; @@ -1866,7 +2047,7 @@ multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> { } class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> : - MUBUF_Real<op, ps>, + MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> { let AssemblerPredicate=HasUnpackedD16VMem; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index bcd03585974..0196b36a95c 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>; -def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>; +def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>; +def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>; -def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>; -def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>; +def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; +def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>; //===----------------------------------------------------------------------===// // FLAT classes @@ -51,6 +51,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins, bits<1> has_data = 1; bits<1> has_glc = 1; bits<1> glcValue = 0; + bits<1> has_dlc = 1; + bits<1> dlcValue = 0; let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); @@ -88,6 +90,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : bits<1> slc; bits<1> glc; + bits<1> dlc; // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? @@ -141,9 +144,9 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass, !con((ins VReg_64:$vaddr), !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)), - (ins GLC:$glc, SLC:$slc)), + (ins GLC:$glc, SLC:$slc, DLC:$dlc)), !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), - " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> { + " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { let has_data = 0; let mayLoad = 1; let has_saddr = HasSaddr; @@ -164,8 +167,8 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, !con((ins VReg_64:$vaddr, vdataClass:$vdata), !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)), - (ins GLC:$glc, SLC:$slc)), - " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> { + (ins GLC:$glc, SLC:$slc, DLC:$dlc)), + " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -198,9 +201,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, opName, (outs regClass:$vdst), !if(EnableSaddr, - (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc), - (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)), - " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> { + (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), + (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), + " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { let has_data = 0; let mayLoad = 1; let has_saddr = 1; @@ -214,9 +217,9 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En opName, (outs), !if(EnableSaddr, - (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc), - (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)), - " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> { + (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), + (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), + " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -248,6 +251,8 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, let mayStore = 1; let has_glc = 0; let glcValue = 0; + let has_dlc = 0; + let dlcValue = 0; let has_vdst = 0; let maybeAtomic = 1; } @@ -258,6 +263,7 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, let hasPostISelHook = 1; let has_vdst = 1; let glcValue = 1; + let dlcValue = 0; let PseudoInstr = NAME # "_RTN"; } @@ -492,8 +498,8 @@ defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat>; -// GFX7-only flat instructions. -let SubtargetPredicate = isGFX7Only in { +// GFX7-, GFX10-only flat instructions. +let SubtargetPredicate = isGFX7GFX10 in { defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; @@ -513,7 +519,7 @@ defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", VReg_64, f64>; -} // End SubtargetPredicate = isGFX7Only +} // End SubtargetPredicate = isGFX7GFX10 let SubtargetPredicate = HasFlatGlobalInsts in { defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; @@ -656,6 +662,22 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor } // End SubtargetPredicate = HasFlatScratchInsts +let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { + defm GLOBAL_ATOMIC_FCMPSWAP : + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; + defm GLOBAL_ATOMIC_FMIN : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; + defm GLOBAL_ATOMIC_FMAX : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; + defm GLOBAL_ATOMIC_FCMPSWAP_X2 : + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; + defm GLOBAL_ATOMIC_FMIN_X2 : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; + defm GLOBAL_ATOMIC_FMAX_X2 : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; +} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 + + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -663,51 +685,51 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor // Patterns for global loads with no offset. class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 0, $slc) + (inst $vaddr, $offset, 0, 0, $slc) >; class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in), - (inst $vaddr, $offset, 0, $slc, $in) + (inst $vaddr, $offset, 0, 0, $slc, $in) >; class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in), - (inst $vaddr, $offset, 0, $slc, $in) + (inst $vaddr, $offset, 0, 0, $slc, $in) >; class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 0, $slc) + (inst $vaddr, $offset, 0, 0, $slc) >; class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 0, $slc) + (inst $vaddr, $offset, 0, 0, $slc) >; class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, @@ -1108,3 +1130,193 @@ defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; + + +//===----------------------------------------------------------------------===// +// GFX10. +//===----------------------------------------------------------------------===// + +class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : + FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { + let AssemblerPredicate = isGFX10Plus; + let DecoderNamespace = "GFX10"; + + let Inst{11-0} = {offset{12}, offset{10-0}}; + let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); + let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); + let Inst{55} = 0; +} + + +multiclass FLAT_Real_Base_gfx10<bits<7> op> { + def _gfx10 : + FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; +} + +multiclass FLAT_Real_RTN_gfx10<bits<7> op> { + def _RTN_gfx10 : + FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; +} + +multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { + def _SADDR_gfx10 : + FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; +} + +multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { + def _SADDR_RTN_gfx10 : + FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; +} + + +multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : + FLAT_Real_Base_gfx10<op>, + FLAT_Real_SADDR_gfx10<op>; + +multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : + FLAT_Real_Base_gfx10<op>, + FLAT_Real_RTN_gfx10<op>; + +multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : + FLAT_Real_AllAddr_gfx10<op>, + FLAT_Real_RTN_gfx10<op>, + FLAT_Real_SADDR_RTN_gfx10<op>; + + +// ENC_FLAT. +defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; +defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; +defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; +defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; +defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; +defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; +defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; +defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; +defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; +defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; +defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; +defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; +defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; +defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; +defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; +defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; +defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; +defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; +defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; +defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; +defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; +defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; +defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; +defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; +defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; +defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; +defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; +defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; +defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; +defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; +defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; +defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; +defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; +defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; +defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; +defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; +defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; +defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; +defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; +defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; +defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; +defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; +defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; +defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; +defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; +defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; +defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; +defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; +defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; +defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; +defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; +defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; +defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; +defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; + + +// ENC_FLAT_GLBL. +defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; +defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; +defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; +defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; +defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; +defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; +defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; +defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; +defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; +defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; +defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; +defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; +defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; +defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; +defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; +defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; +defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; +defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; +defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; +defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; +defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; +defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; +defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; +defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; +defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; +defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; +defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; +defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; +defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; +defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; +defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; +defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; +defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; +defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; +defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; +defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; +defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; +defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; +defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; +defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; +defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; +defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; +defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; +defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; +defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; +defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; +defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; +defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; +defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; +defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; +defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; +defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; +defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; +defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; + + +// ENC_FLAT_SCRATCH. +defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; +defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; +defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; +defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; +defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; +defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; +defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; +defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; +defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; +defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; +defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; +defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; +defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; +defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; +defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; +defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; +defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; +defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; +defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; +defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; +defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; +defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 1ed9129e3bc..b8bd426f3ea 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -72,8 +72,14 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, } void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { - O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm())); + // GFX10: Address offset is 12-bit signed byte offset. + if (AMDGPU::isGFX10(STI)) { + O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm())); + } else { + O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm())); + } } void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, @@ -128,7 +134,7 @@ void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo, uint16_t Imm = MI->getOperand(OpNo).getImm(); if (Imm != 0) { O << ((OpNo == 0)? "offset:" : " offset:"); - printS13ImmDecOperand(MI, OpNo, O); + printS13ImmDecOperand(MI, OpNo, STI, O); } } @@ -173,6 +179,12 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "gds"); } +void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + if (AMDGPU::isGFX10(STI)) + printNamedBit(MI, OpNo, O, "dlc"); +} + void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "glc"); diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index 5f5a7fe5c16..228317a9479 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -41,7 +41,8 @@ private: void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, @@ -67,6 +68,8 @@ private: const MCSubtargetInfo &STI, raw_ostream &O); void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp index 43023b3df70..5b834c8de13 100644 --- a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -197,6 +197,11 @@ static bool fixupGlobalSaddr(MachineBasicBlock &MBB, // Atomics dont have a GLC, so omit the field if not there. if (Glc) NewGlob->addOperand(MF, *Glc); + + MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc); + if (DLC) + NewGlob->addOperand(MF, *DLC); + NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); // _D16 have an vdst_in operand, copy it in. MachineOperand *VDstInOp = TII->getNamedOperand(MI, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 89f6247dada..d2dd3491f86 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -70,6 +70,24 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, // Do a 64-bit pointer add. if (ST.flatScratchIsPointer()) { + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) + .addReg(FlatScrInitLo) + .addReg(ScratchWaveOffsetReg); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) + .addReg(FlatScrInitHi) + .addImm(0); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). + addReg(FlatScrInitLo). + addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | + (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). + addReg(FlatScrInitHi). + addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | + (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); + return; + } + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) .addReg(FlatScrInitLo) .addReg(ScratchWaveOffsetReg); @@ -80,6 +98,8 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, return; } + assert(ST.getGeneration() < AMDGPUSubtarget::GFX10); + // Copy the size in bytes. BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) .addReg(FlatScrInitHi, RegState::Kill); @@ -423,6 +443,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, .addReg(Rsrc01) .addImm(EncodedOffset) // offset .addImm(0) // glc + .addImm(0) // dlc .addReg(ScratchRsrcReg, RegState::ImplicitDefine) .addMemOperand(MMO); return; @@ -463,6 +484,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, .addReg(MFI->getImplicitBufferPtrUserSGPR()) .addImm(0) // offset .addImm(0) // glc + .addImm(0) // dlc .addMemOperand(MMO) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0983da17d87..84792c31214 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4166,6 +4166,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, getNamedOperand(MI, AMDGPU::OpName::glc)) { MIB.addImm(GLC->getImm()); } + if (const MachineOperand *DLC = + getNamedOperand(MI, AMDGPU::OpName::dlc)) { + MIB.addImm(DLC->getImm()); + } MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f550b58b5bb..eb67c22a7d6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -830,6 +830,7 @@ def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; +def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index d663616f02d..b7541e0df62 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -131,6 +131,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass { bool GLC1; bool SLC0; bool SLC1; + bool DLC0; + bool DLC1; bool UseST64; SmallVector<MachineInstr *, 8> InstsToMove; }; @@ -323,7 +325,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) { return (EltOffset0 + CI.Width0 == EltOffset1 || EltOffset1 + CI.Width1 == EltOffset0) && - CI.GLC0 == CI.GLC1 && + CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 && (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1); } @@ -637,6 +639,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } + CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm(); + CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm(); } // Check both offsets fit in the reduced range. @@ -857,6 +861,7 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) .addImm(MergedOffset) // offset .addImm(CI.GLC0) // glc + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI); @@ -909,6 +914,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { .addImm(CI.GLC0) // glc .addImm(CI.SLC0) // slc .addImm(0) // tfe + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI); @@ -1088,9 +1094,10 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(std::min(CI.Offset0, CI.Offset1)) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); moveInstsAfter(MIB, CI.InstsToMove); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 642479db020..b503af4d210 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -536,6 +536,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe + .addImm(0) // dlc .cloneMemRefs(*MI); const MachineOperand *VDataIn = TII->getNamedOperand(*MI, @@ -639,6 +640,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe + .addImm(0) // dlc .addMemOperand(NewMMO); if (NumSubRegs > 1) @@ -769,6 +771,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc + .addImm(0) // dlc .addMemOperand(MMO); continue; @@ -928,9 +931,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, auto MIB = BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg, RegState::Kill) // soff - .addImm(0) // glc + .addReg(MFI->getScratchRSrcReg()) // sbase + .addReg(OffsetReg, RegState::Kill) // soff + .addImm(0) // glc + .addImm(0) // dlc .addMemOperand(MMO); if (NumSubRegs > 1 && i == 0) diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index b1afaa713f4..3dc45fb2f96 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -40,6 +40,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt bits<1> has_sbase = 1; bits<1> has_sdst = 1; bit has_glc = 0; + bit has_dlc = 0; bits<1> has_offset = 1; bits<1> offset_is_imm = 0; } @@ -79,6 +80,7 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> let mayLoad = 1; let mayStore = 0; let has_glc = 1; + let has_dlc = 1; } class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []> @@ -88,6 +90,7 @@ class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern let mayLoad = 0; let mayStore = 1; let has_glc = 1; + let has_dlc = 1; let ScalarStore = 1; } @@ -108,21 +111,23 @@ multiclass SM_Pseudo_Loads<string opName, RegisterClass dstClass> { def _IMM : SM_Load_Pseudo <opName, (outs dstClass:$sdst), - (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc), - " $sdst, $sbase, $offset$glc", []> { + (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc), + " $sdst, $sbase, $offset$glc$dlc", []> { let offset_is_imm = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; let has_glc = 1; + let has_dlc = 1; } def _SGPR : SM_Load_Pseudo <opName, (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc), - " $sdst, $sbase, $offset$glc", []> { + (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc), + " $sdst, $sbase, $offset$glc$dlc", []> { let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; let has_glc = 1; + let has_dlc = 1; } } @@ -130,8 +135,8 @@ multiclass SM_Pseudo_Stores<string opName, RegisterClass baseClass, RegisterClass srcClass> { def _IMM : SM_Store_Pseudo <opName, - (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc), - " $sdata, $sbase, $offset$glc", []> { + (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc), + " $sdata, $sbase, $offset$glc$dlc", []> { let offset_is_imm = 1; let BaseClass = baseClass; let SrcClass = srcClass; @@ -139,8 +144,8 @@ multiclass SM_Pseudo_Stores<string opName, } def _SGPR : SM_Store_Pseudo <opName, - (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc), - " $sdata, $sbase, $offset$glc", []> { + (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc), + " $sdata, $sbase, $offset$glc$dlc", []> { let BaseClass = baseClass; let SrcClass = srcClass; let PseudoInstr = opName # "_SGPR"; @@ -184,6 +189,16 @@ multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> { def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>; } +class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo< + opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins), + " $sdst", [(set i32:$sdst, (node))]> { + let hasSideEffects = 1; + let mayStore = 0; + let mayLoad = 1; + let has_sbase = 0; + let has_offset = 0; +} + //===----------------------------------------------------------------------===// // Scalar Atomic Memory Classes //===----------------------------------------------------------------------===// @@ -197,6 +212,7 @@ class SM_Atomic_Pseudo <string opName, let mayLoad = 1; let mayStore = 1; let has_glc = 1; + let has_dlc = 1; // Should these be set? let ScalarStore = 1; @@ -212,9 +228,9 @@ class SM_Pseudo_Atomic<string opName, SM_Atomic_Pseudo<opName, !if(isRet, (outs dataClass:$sdst), (outs)), !if(isImm, - (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset), - (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset)), - !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", ""), + (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc), + (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)), + !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc", isRet> { let offset_is_imm = isImm; let PseudoInstr = opName # !if(isImm, @@ -272,6 +288,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads < "s_buffer_load_dwordx16", SReg_128, SReg_512 >; +let SubtargetPredicate = HasScalarStores in { defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; @@ -287,7 +304,7 @@ defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores < defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < "s_buffer_store_dwordx4", SReg_128, SReg_128 >; - +} // End SubtargetPredicate = HasScalarStores def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; @@ -297,15 +314,22 @@ def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_ } // let SubtargetPredicate = isGFX7GFX8GFX9 let SubtargetPredicate = isGFX8Plus in { +let OtherPredicates = [HasScalarStores] in { def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; +} // End OtherPredicates = [HasScalarStores] def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>; defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>; } // SubtargetPredicate = isGFX8Plus -let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in { +let SubtargetPredicate = isGFX10Plus in { +def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">; +def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>; +} // End SubtargetPredicate = isGFX10Plus + +let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in { defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>; @@ -313,7 +337,7 @@ defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_6 defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>; -} // SubtargetPredicate = HasFlatScratchInsts +} // SubtargetPredicate = HasScalarFlatScratchInsts let SubtargetPredicate = HasScalarAtomics in { @@ -375,7 +399,7 @@ defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_6 } // let SubtargetPredicate = HasScalarAtomics -let SubtargetPredicate = isGFX9Only in { +let SubtargetPredicate = HasScalarAtomics in { defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">; defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">; } @@ -411,13 +435,13 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps, SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { def _IMM_si : SMRD_Real_si <op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc); } // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _SGPR_si : SMRD_Real_si <op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); } } @@ -464,10 +488,10 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps, SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { def _IMM_vi : SMEM_Real_vi <op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_vi : SMEM_Real_vi <op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); } } @@ -485,11 +509,11 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps, // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _IMM_vi : SMEM_Real_Store_vi <op, immPs> { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc); + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); } } @@ -638,7 +662,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> : let AssemblerPredicates = [isGFX7Only]; let DecoderNamespace = "GFX7"; - let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc); + let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc); let LGKM_CNT = ps.LGKM_CNT; let SMRD = ps.SMRD; @@ -718,26 +742,26 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> { // 1. IMM offset def : GCNPat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0)) + (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0)) >; // 2. 32-bit IMM offset on CI def : GCNPat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> { + (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> { let OtherPredicates = [isGFX7Only]; } // 3. SGPR offset def : GCNPat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0)) + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0)) >; // 4. No offset def : GCNPat < (vt (smrd_load (i64 SReg_64:$sbase))), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0)) + (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0)) >; } @@ -745,20 +769,20 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> { // 1. Offset as an immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc))) + (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), 0)) >; // 2. 32-bit IMM offset on CI def : GCNPat < (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)), - (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> { + (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), 0)> { let OtherPredicates = [isGFX7Only]; } // 3. Offset loaded in an 32bit SGPR def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc))) + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), 0)) >; } @@ -801,3 +825,198 @@ def : GCNPat < >; } // let OtherPredicates = [isGFX8Plus] + +//===----------------------------------------------------------------------===// +// GFX10. +//===----------------------------------------------------------------------===// + +class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> : + SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 { + bit glc; + bit dlc; + + let AssemblerPredicates = [isGFX10Plus]; + let DecoderNamespace = "GFX10"; + + let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); + let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + let Inst{14} = !if(ps.has_dlc, dlc, ?); + let Inst{16} = !if(ps.has_glc, glc, ?); + let Inst{25-18} = op; + let Inst{31-26} = 0x3d; + let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?); + let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding), + !if(ps.has_offset, offset{6-0}, ?)); +} + +multiclass SM_Real_Loads_gfx10<bits<8> op, string ps, + SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), + SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { + def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> { + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + } + def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> { + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + } +} + +class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> { + bits<7> sdata; + + let sdst = ?; + let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); +} + +multiclass SM_Real_Stores_gfx10<bits<8> op, string ps, + SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM), + SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> { + // FIXME: The operand name $offset is inconsistent with $soff used + // in the pseudo + def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> { + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + } + + def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> { + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + } +} + +defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">; +defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">; +defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">; +defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">; +defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">; + +let SubtargetPredicate = HasScalarFlatScratchInsts in { +defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">; +defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">; +defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">; +} // End SubtargetPredicate = HasScalarFlatScratchInsts + +defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">; +defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">; +defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">; +defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">; +defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">; + +let SubtargetPredicate = HasScalarStores in { +defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">; +defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">; +defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">; +let OtherPredicates = [HasScalarFlatScratchInsts] in { +defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">; +defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">; +defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">; +} // End OtherPredicates = [HasScalarFlatScratchInsts] +defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">; +defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">; +defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">; +} // End SubtargetPredicate = HasScalarStores + +def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>; +def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>; +def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>; +def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>; +def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>; + +let SubtargetPredicate = HasScalarStores in { +def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; +} // End SubtargetPredicate = HasScalarStores + +multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> { + def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; + def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; +} + +defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">; +defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">; + +class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps> + : SMEM_Real_gfx10 <op, ps> { + + bits<7> sdata; + bit dlc; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + let glc = ps.glc; + + let Inst{14} = !if(ps.has_dlc, dlc, 0); + let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0}); +} + +multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> { + def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; + def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; + def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; + def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; +} + +let SubtargetPredicate = HasScalarAtomics in { + +defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">; +defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">; +defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">; +defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">; +defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">; +defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">; +defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">; +defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">; +defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">; +defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">; +defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">; +defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">; +defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">; + +defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">; +defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">; +defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">; +defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">; +defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">; +defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">; +defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">; +defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">; +defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">; +defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">; +defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">; +defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">; +defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">; + +defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">; +defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">; +defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">; +defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">; +defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">; +defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">; +defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">; +defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">; +defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">; +defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">; +defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">; +defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">; +defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">; + +defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">; +defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">; +defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">; +defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">; +defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">; +defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">; +defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">; +defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">; +defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">; +defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">; +defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">; +defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">; +defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">; + +multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> { + def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; + def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; +} + +defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">; +defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">; + +} // End SubtargetPredicate = HasScalarAtomics |