diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-08-02 19:31:14 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-08-02 19:31:14 +0000 |
commit | 8a482b33fed526b17a63e4539ca3036a89aea579 (patch) | |
tree | 246d55a61c69f1a3d98cb04230b0505da4058b39 /llvm/lib | |
parent | 3a9f2a5a8d1f3a01086b2816359f76479d1ec58e (diff) | |
download | bcm5719-llvm-8a482b33fed526b17a63e4539ca3036a89aea579.tar.gz bcm5719-llvm-8a482b33fed526b17a63e4539ca3036a89aea579.zip |
AMDGPU: Stay in WQM for non-intrinsic stores
Summary:
Two types of stores are possible in pixel shaders: stores to memory that are
explicitly requested at the API level, and stores that are an implementation
detail of register spilling or lowering of arrays.
For the first kind of store, we must ensure that helper pixels have no effect
and hence WQM must be disabled. The second kind of store must always be
executed, because the written value may be loaded again in a way that is
relevant for helper pixels as well -- and there are no externally visible
effects anyway.
This is a candidate for the 3.9 release branch.
Reviewers: arsenm, tstellarAMD, mareko
Subscribers: arsenm, kzhuravl, llvm-commits
Differential Revision: https://reviews.llvm.org/D22675
llvm-svn: 277504
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 9 |
6 files changed, 33 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 54efdc0a046..f4b04e3631a 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -41,7 +41,8 @@ enum { WQM = 1 << 22, VGPRSpill = 1 << 23, VOPAsmPrefer32Bit = 1 << 24, - Gather4 = 1 << 25 + Gather4 = 1 << 25, + DisableWQM = 1 << 26 }; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 4a9d8dbfaf7..76412051fff 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -41,6 +41,8 @@ class InstSI <dag outs, dag ins, string asm = "", field bits<1> DS = 0; field bits<1> MIMG = 0; field bits<1> FLAT = 0; + + // Whether WQM _must_ be enabled for this instruction. field bits<1> WQM = 0; field bits<1> VGPRSpill = 0; @@ -50,6 +52,9 @@ class InstSI <dag outs, dag ins, string asm = "", field bits<1> Gather4 = 0; + // Whether WQM _must_ be disabled for this instruction. + field bits<1> DisableWQM = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -81,6 +86,7 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{23} = VGPRSpill; let TSFlags{24} = VOPAsmPrefer32Bit; let TSFlags{25} = Gather4; + let TSFlags{26} = DisableWQM; let SchedRW = [Write32Bit]; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 7ed7c839576..4503466ca33 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -340,6 +340,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::WQM; } + static bool isDisableWQM(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; + } + + bool isDisableWQM(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; + } + static bool isVGPRSpill(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 63de74188ed..bbe1b5a4fd3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2723,6 +2723,10 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm, def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, MUBUFAddr64Table <0>; + let DisableWQM = 1 in { + def "_exact" : MUBUF_Pseudo <opName, outs, ins, []>; + } + let addr64 = 0, isCodeGenOnly = 0 in { def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; } @@ -2793,7 +2797,8 @@ multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins, multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, ValueType vt, SDPatternOperator atomic> { - let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in { + let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1, + DisableWQM = 1 in { // No return variants let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in { @@ -3197,6 +3202,7 @@ class MIMG_Store_Helper <bits<7> op, string asm, let mayStore = 1; let hasSideEffects = 1; let hasPostISelHook = 0; + let DisableWQM = 1; } multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm, @@ -3228,6 +3234,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc, let mayStore = 1; let hasSideEffects = 1; let hasPostISelHook = 0; + let DisableWQM = 1; let Constraints = "$vdst = $vdata"; let AsmMatchConverter = "cvtMIMGAtomic"; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f8db0b7f4bb..f6c2719dd84 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2050,7 +2050,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _OFFSET) $vdata, $rsrc, $soffset, (as_i16imm $offset), + (!cast<MUBUF>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; @@ -2058,7 +2058,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _IDXEN) $vdata, $vindex, $rsrc, $soffset, + (!cast<MUBUF>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; @@ -2067,7 +2067,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _OFFEN) $vdata, $voffset, $rsrc, $soffset, + (!cast<MUBUF>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; @@ -2076,7 +2076,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), - (!cast<MUBUF>(opcode # _BOTHEN) + (!cast<MUBUF>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index cb35a054166..c8bfc5aa460 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -185,7 +185,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (TII->isWQM(Opcode) || TII->isDS(Opcode)) { Flags = StateWQM; - } else if (MI.mayStore() && TII->usesVM_CNT(MI)) { + } else if (TII->isDisableWQM(MI)) { Flags = StateExact; } else { // Handle export instructions with the exec mask valid flag set @@ -237,9 +237,10 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI, InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references BlockInfo &BI = Blocks[MBB]; - // Control flow-type instructions that are followed by WQM computations - // must themselves be in WQM. - if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) { + // Control flow-type instructions and stores to temporary memory that are + // followed by WQM computations must themselves be in WQM. + if ((II.OutNeeds & StateWQM) && !II.Needs && + (MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) { Instructions[&MI].Needs = StateWQM; II.Needs = StateWQM; } |