diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 296 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 18 |
14 files changed, 308 insertions, 155 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b2491ebc6f4..c74a361b2c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -186,10 +186,11 @@ private: bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE, SDValue &DLC) const; + SDValue &SLC, SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -202,7 +203,7 @@ private: bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, @@ -1313,7 +1314,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { // Subtarget prefers to use flat instruction if (Subtarget->useFlatForGlobal()) return false; @@ -1326,6 +1328,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1405,7 +1408,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE, - SDValue &DLC) const { + SDValue &DLC, SDValue &SWZ) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1413,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; ConstantSDNode *C = cast<ConstantSDNode>(Addr64); @@ -1435,9 +1438,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ); } static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { @@ -1562,13 +1565,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; if (!cast<ConstantSDNode>(Offen)->getSExtValue() && @@ -1590,16 +1594,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset ) const { - SDValue GLC, SLC, TFE, DLC; + SDValue GLC, SLC, TFE, DLC, SWZ; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const { - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); } template <bool IsSigned> diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5480eb5595a..c5e60ed77be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -762,16 +762,20 @@ static bool isZero(Register Reg, MachineRegisterInfo &MRI) { return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0; } -static unsigned extractGLC(unsigned CachePolicy) { - return CachePolicy & 1; +static unsigned extractGLC(unsigned AuxiliaryData) { + return AuxiliaryData & 1; } -static unsigned extractSLC(unsigned CachePolicy) { - return (CachePolicy >> 1) & 1; +static unsigned extractSLC(unsigned AuxiliaryData) { + return (AuxiliaryData >> 1) & 1; } -static unsigned extractDLC(unsigned CachePolicy) { - return (CachePolicy >> 2) & 1; +static unsigned extractDLC(unsigned AuxiliaryData) { + return (AuxiliaryData >> 2) & 1; +} + +static unsigned extractSWZ(unsigned AuxiliaryData) { + return (AuxiliaryData >> 3) & 1; } // Returns Base register, constant offset, and offset def point. @@ -970,7 +974,7 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, Register RSrc = MI.getOperand(2).getReg(); Register VOffset = MI.getOperand(3).getReg(); Register SOffset = MI.getOperand(4).getReg(); - unsigned CachePolicy = MI.getOperand(5).getImm(); + unsigned AuxiliaryData = MI.getOperand(5).getImm(); unsigned ImmOffset; unsigned TotalOffset; @@ -994,10 +998,11 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, MIB.addUse(RSrc) .addUse(SOffset) .addImm(ImmOffset) - .addImm(extractGLC(CachePolicy)) - .addImm(extractSLC(CachePolicy)) + .addImm(extractGLC(AuxiliaryData)) + .addImm(extractSLC(AuxiliaryData)) .addImm(0) // tfe: FIXME: Remove from inst - .addImm(extractDLC(CachePolicy)) + .addImm(extractDLC(AuxiliaryData)) + .addImm(extractSWZ(AuxiliaryData)) .addMemOperand(MMO); MI.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b946c308cf1..94d1d350dfd 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -143,6 +143,7 @@ public: ImmTyDLC, ImmTyGLC, ImmTySLC, + ImmTySWZ, ImmTyTFE, ImmTyD16, ImmTyClampSI, @@ -328,6 +329,7 @@ public: bool isDLC() const { return isImmTy(ImmTyDLC); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } + bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } @@ -820,6 +822,7 @@ public: case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; + case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; case ImmTyFORMAT: OS << "FORMAT"; break; @@ -6037,6 +6040,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 40887a3c56e..c9e8abad7c3 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">; -def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">; +def MUBUFAddr64 : ComplexPattern<i64, 9, "SelectMUBUFAddr64">; def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>; def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>; -def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">; +def MUBUFOffset : ComplexPattern<i64, 8, "SelectMUBUFOffset">; def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; @@ -54,6 +54,17 @@ class MTBUFAddr64Table <bit is_addr64, string Name> { // MTBUF classes //===----------------------------------------------------------------------===// +class MTBUFGetBaseOpcode<string Op> { + string ret = !subst("FORMAT_XY", "FORMAT_X", + !subst("FORMAT_XYZ", "FORMAT_X", + !subst("FORMAT_XYZW", "FORMAT_X", Op))); +} + +class getMTBUFElements<string Op> { + int ret = 1; +} + + class MTBUF_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> : InstSI<outs, ins, "", pattern>, @@ -67,6 +78,9 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, string Mnemonic = opName; string AsmOperands = asmOps; + Instruction Opcode = !cast<Instruction>(NAME); + Instruction BaseOpcode = !cast<Instruction>(MTBUFGetBaseOpcode<NAME>.ret); + let VM_CNT = 1; let EXP_CNT = 1; let MTBUF = 1; @@ -90,6 +104,7 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, bits<1> has_offset = 1; bits<1> has_slc = 1; bits<1> has_tfe = 1; + bits<4> elements = 0; } class MTBUF_Real <MTBUF_Pseudo ps> : @@ -126,17 +141,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList, RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc), + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc) + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc), + SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc) + SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -181,51 +196,54 @@ class MTBUF_SetupAddr<int addrKind> { class MTBUF_Load_Pseudo <string opName, int addrKind, RegisterClass vdataClass, + int elems, list<dag> pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind> : MTBUF_Pseudo<opName, (outs vdataClass:$vdata), getMTBUFIns<addrKindCopy>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; let mayLoad = 1; let mayStore = 0; + let elements = elems; } multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass, - ValueType load_vt = i32, + int elems, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { - def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, + def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format, - i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>, + i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, + def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, - i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>, + i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, MTBUFAddr64Table<1, NAME>; - def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; - def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; - def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; + def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; + def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; let DisableWQM = 1 in { - def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>; - def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; - def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; - def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>; + def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; + def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; + def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; } } class MTBUF_Store_Pseudo <string opName, int addrKind, RegisterClass vdataClass, + int elems, list<dag> pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind, @@ -233,39 +251,40 @@ class MTBUF_Store_Pseudo <string opName, : MTBUF_Pseudo<opName, (outs), getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; let mayLoad = 0; let mayStore = 1; + let elements = elems; } multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass, - ValueType store_vt = i32, + int elems, ValueType store_vt = i32, SDPatternOperator st = null_frag> { - def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, + def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe, i1:$dlc))]>, + i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, + def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe, i1:$dlc))]>, + i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MTBUFAddr64Table<1, NAME>; - def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; - def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; - def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; + def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; + def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; let DisableWQM = 1 in { - def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>; - def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; - def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; - def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; + def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>; + def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; + def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>; + def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>; } } @@ -393,7 +412,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList, ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc)) + !if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz)) ); } @@ -465,7 +484,7 @@ class MUBUF_Load_Pseudo <string opName, !con(getMUBUFIns<addrKindCopy, [], isLds>.ret, !if(HasTiedDest, (ins getVregSrcForVT<vdata_vt>.ret:$vdata_in), (ins))), " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe") # "$dlc", + !if(isLds, " lds", "$tfe") # "$dlc" # "$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -483,15 +502,15 @@ class MUBUF_Load_Pseudo <string opName, } class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) >; class MUBUF_Addr64_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) >; multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { @@ -542,7 +561,7 @@ class MUBUF_Store_Pseudo <string opName, : MUBUF_Pseudo<opName, (outs), getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -558,12 +577,12 @@ multiclass MUBUF_Pseudo_Stores<string opName, def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt, [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, store_vt, [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>; @@ -581,8 +600,8 @@ multiclass MUBUF_Pseudo_Stores<string opName, class MUBUF_Pseudo_Store_Lds<string opName> : MUBUF_Pseudo<opName, (outs), - (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc), - " $srsrc, $soffset$offset lds$glc$slc"> { + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz), + " $srsrc, $soffset$offset lds$glc$slc$swz"> { let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; @@ -1065,35 +1084,35 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < // MTBUF Instructions //===----------------------------------------------------------------------===// -defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>; -defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>; -defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32, 1>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64, 2>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96, 3>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128, 4>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32, 1>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64, 2>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96, 3>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128, 4>; let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { - defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>; - defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>; - defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>; - defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>; - defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>; + defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; + defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64, 2>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96, 3>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128, 4>; + defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; + defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64, 2>; + defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96, 3>; + defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128, 4>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { - defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>; - defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>; + defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; + defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32, 2>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64, 3>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64, 4>; + defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; + defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32, 2>; + defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64, 3>; + defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64, 4>; } // End HasPackedD16VMem. let SubtargetPredicate = isGFX7Plus in { @@ -1128,6 +1147,10 @@ def extract_dlc : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); }]>; +def extract_swz : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// @@ -1136,32 +1159,36 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, - timm:$cachepolicy, 0)), + timm:$auxiliary, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, 0)), + timm:$auxiliary, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm)), + timm:$auxiliary, timm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm)), + timm:$auxiliary, timm)), (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1211,35 +1238,39 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, - timm:$cachepolicy, 0), + timm:$auxiliary, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, 0), + timm:$auxiliary, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm), + timm:$auxiliary, timm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, - timm:$cachepolicy, timm), + timm:$auxiliary, timm), (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + $rsrc, $soffset, (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1441,8 +1472,8 @@ def : GCNPat< class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt, PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) >; multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, @@ -1450,12 +1481,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) >; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) >; } @@ -1476,8 +1507,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) >; } @@ -1500,12 +1531,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen, def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; } @@ -1515,12 +1546,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen, ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) >; } @@ -1560,16 +1591,16 @@ defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D1 multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, ValueType vt, PatFrag atomic_st> { - // Store follows atomic op convention so address is forst + // Store follows atomic op convention so address is first def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1583,8 +1614,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) >; } @@ -1598,13 +1629,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; } @@ -1643,36 +1674,40 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, 0)), + timm:$format, timm:$auxiliary, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, timm)), + timm:$format, timm:$auxiliary, timm)), (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, 0)), + timm:$format, timm:$auxiliary, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, timm)), + timm:$format, timm:$auxiliary, timm)), (!cast<MTBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1701,36 +1736,40 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, 0), + timm:$format, timm:$auxiliary, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, timm), + timm:$format, timm:$auxiliary, timm), (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, - timm:$format, timm:$cachepolicy, 0), + timm:$format, timm:$auxiliary, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, - timm:$offset, timm:$format, timm:$cachepolicy, timm), + timm:$offset, timm:$format, timm:$auxiliary, timm), (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -2397,3 +2436,22 @@ def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex { let Table = MUBUFInfoTable; let Key = ["BaseOpcode", "elements"]; } + +def MTBUFInfoTable : GenericTable { + let FilterClass = "MTBUF_Pseudo"; + let CppTypeName = "MTBUFInfo"; + let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getMTBUFOpcodeHelper"; +} + +def getMTBUFInfoFromOpcode : SearchIndex { + let Table = MTBUFInfoTable; + let Key = ["Opcode"]; +} + +def getMTBUFInfoFromBaseOpcodeAndElements : SearchIndex { + let Table = MTBUFInfoTable; + let Key = ["BaseOpcode", "elements"]; +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index a4516254397..d2ea94548df 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -196,6 +196,10 @@ void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "slc"); } +void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { +} + void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "tfe"); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 0f62f039763..66b70831ff9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -72,6 +72,8 @@ private: raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 45c06ebb547..22f035e7f3e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -112,6 +112,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); return; } @@ -132,6 +133,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); } @@ -157,6 +159,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); return; } @@ -177,6 +180,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 88dec95177c..1883b28f657 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6271,7 +6271,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.first, // voffset Op.getOperand(4), // soffset Offsets.second, // offset - Op.getOperand(5), // cachepolicy + Op.getOperand(5), // cachepolicy, swizzled buffer DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; @@ -6289,7 +6289,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.first, // voffset Op.getOperand(5), // soffset Offsets.second, // offset - Op.getOperand(6), // cachepolicy + Op.getOperand(6), // cachepolicy, swizzled buffer DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; @@ -6338,7 +6338,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // soffset Offsets.second, // offset Op.getOperand(5), // format - Op.getOperand(6), // cachepolicy + Op.getOperand(6), // cachepolicy, swizzled buffer DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; @@ -6362,7 +6362,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // format - Op.getOperand(7), // cachepolicy + Op.getOperand(7), // cachepolicy, swizzled buffer DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; @@ -6832,7 +6832,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // format - Op.getOperand(8), // cachepolicy + Op.getOperand(8), // cachepolicy, swizzled buffer DAG.getTargetConstant(1, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : @@ -6857,7 +6857,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // format - Op.getOperand(7), // cachepolicy + Op.getOperand(7), // cachepolicy, swizzled buffer DAG.getTargetConstant(0, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : @@ -6931,7 +6931,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.first, // voffset Op.getOperand(5), // soffset Offsets.second, // offset - Op.getOperand(6), // cachepolicy + Op.getOperand(6), // cachepolicy, swizzled buffer DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; unsigned Opc = @@ -6975,7 +6975,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.first, // voffset Op.getOperand(6), // soffset Offsets.second, // offset - Op.getOperand(7), // cachepolicy + Op.getOperand(7), // cachepolicy, swizzled buffer DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ? diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b6a90241d4d..d5f2902f18a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4693,6 +4693,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, MIB.addImm(TFE->getImm()); } + MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); + MIB.cloneMemRefs(MI); Addr64 = MIB; } else { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index e1b32c4964c..7473a0c64b2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, @@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, @@ -1035,6 +1035,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; +def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 302b299765e..a78b62de715 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -640,6 +640,12 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { return false; } + // Do not merge VMEM buffer instructions with "swizzled" bit set. + int Swizzled = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz); + if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm()) + return false; + for (unsigned i = 0; i < CI.NumAddresses; i++) { // We only ever merge operations with the same base address register, so // don't bother scanning forward if there are no other uses. @@ -998,6 +1004,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { .addImm(CI.SLC0) // slc .addImm(0) // tfe .addImm(CI.DLC0) // dlc + .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI); @@ -1191,6 +1198,7 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { .addImm(CI.SLC0) // slc .addImm(0) // tfe .addImm(CI.DLC0) // dlc + .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); moveInstsAfter(MIB, CI.InstsToMove); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 23c357dadde..f4dd995316d 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -617,6 +617,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .cloneMemRefs(*MI); const MachineOperand *VDataIn = TII->getNamedOperand(*MI, @@ -737,6 +738,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(NewMMO); if (!IsStore && TmpReg != AMDGPU::NoRegister) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index bb4169788f4..afb2fd987af 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -137,10 +137,51 @@ struct MUBUFInfo { bool has_soffset; }; +struct MTBUFInfo { + uint16_t Opcode; + uint16_t BaseOpcode; + uint8_t elements; + bool has_vaddr; + bool has_srsrc; + bool has_soffset; +}; + +#define GET_MTBUFInfoTable_DECL +#define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" +int getMTBUFBaseOpcode(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); + return Info ? Info->BaseOpcode : -1; +} + +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { + const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); + return Info ? Info->Opcode : -1; +} + +int getMTBUFElements(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->elements : 0; +} + +bool getMTBUFHasVAddr(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_vaddr : false; +} + +bool getMTBUFHasSrsrc(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_srsrc : false; +} + +bool getMTBUFHasSoffset(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_soffset : false; +} + int getMUBUFBaseOpcode(unsigned Opc) { const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); return Info ? Info->BaseOpcode : -1; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a578fd2bb6a..f78dadd447f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -264,6 +264,24 @@ LLVM_READONLY const MIMGInfo *getMIMGInfo(unsigned Opc); LLVM_READONLY +int getMTBUFBaseOpcode(unsigned Opc); + +LLVM_READONLY +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); + +LLVM_READONLY +int getMTBUFElements(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasVAddr(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSrsrc(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSoffset(unsigned Opc); + +LLVM_READONLY int getMUBUFBaseOpcode(unsigned Opc); LLVM_READONLY |