diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 9 |
6 files changed, 33 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 54efdc0a046..f4b04e3631a 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -41,7 +41,8 @@ enum { WQM = 1 << 22, VGPRSpill = 1 << 23, VOPAsmPrefer32Bit = 1 << 24, - Gather4 = 1 << 25 + Gather4 = 1 << 25, + DisableWQM = 1 << 26 }; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 4a9d8dbfaf7..76412051fff 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -41,6 +41,8 @@ class InstSI <dag outs, dag ins, string asm = "", field bits<1> DS = 0; field bits<1> MIMG = 0; field bits<1> FLAT = 0; + + // Whether WQM _must_ be enabled for this instruction. field bits<1> WQM = 0; field bits<1> VGPRSpill = 0; @@ -50,6 +52,9 @@ class InstSI <dag outs, dag ins, string asm = "", field bits<1> Gather4 = 0; + // Whether WQM _must_ be disabled for this instruction. + field bits<1> DisableWQM = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -81,6 +86,7 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{23} = VGPRSpill; let TSFlags{24} = VOPAsmPrefer32Bit; let TSFlags{25} = Gather4; + let TSFlags{26} = DisableWQM; let SchedRW = [Write32Bit]; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 7ed7c839576..4503466ca33 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -340,6 +340,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::WQM; } + static bool isDisableWQM(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; + } + + bool isDisableWQM(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; + } + static bool isVGPRSpill(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 63de74188ed..bbe1b5a4fd3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2723,6 +2723,10 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm, def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, MUBUFAddr64Table <0>; + let DisableWQM = 1 in { + def "_exact" : MUBUF_Pseudo <opName, outs, ins, []>; + } + let addr64 = 0, isCodeGenOnly = 0 in { def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; } @@ -2793,7 +2797,8 @@ multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins, multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, ValueType vt, SDPatternOperator atomic> { - let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in { + let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1, + DisableWQM = 1 in { // No return variants let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in { @@ -3197,6 +3202,7 @@ class MIMG_Store_Helper <bits<7> op, string asm, let mayStore = 1; let hasSideEffects = 1; let hasPostISelHook = 0; + let DisableWQM = 1; } multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm, @@ -3228,6 +3234,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc, let mayStore = 1; let hasSideEffects = 1; let hasPostISelHook = 0; + let DisableWQM = 1; let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f8db0b7f4bb..f6c2719dd84 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2050,7 +2050,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _OFFSET) $vdata, $rsrc, $soffset, (as_i16imm $offset), + (!cast<MUBUF>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; @@ -2058,7 +2058,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _IDXEN) $vdata, $vindex, $rsrc, $soffset, + (!cast<MUBUF>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; @@ -2067,7 +2067,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _OFFEN) $vdata, $voffset, $rsrc, $soffset, + (!cast<MUBUF>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; @@ -2076,7 +2076,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _BOTHEN) + (!cast<MUBUF>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index cb35a054166..c8bfc5aa460 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -185,7 +185,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (TII->isWQM(Opcode) || TII->isDS(Opcode)) { Flags = StateWQM; - } else if (MI.mayStore() && TII->usesVM_CNT(MI)) { + } else if (TII->isDisableWQM(MI)) { Flags = StateExact; } else { // Handle export instructions with the exec mask valid flag set @@ -237,9 +237,10 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI, InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references BlockInfo &BI = Blocks[MBB]; - // Control flow-type instructions that are followed by WQM computations - // must themselves be in WQM. - if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) { + // Control flow-type instructions and stores to temporary memory that are + // followed by WQM computations must themselves be in WQM. + if ((II.OutNeeds & StateWQM) && !II.Needs && + (MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) { Instructions[&MI].Needs = StateWQM; II.Needs = StateWQM; } |