diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 21 |
4 files changed, 14 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 341ef73a21c..9938eeaa528 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -458,13 +458,6 @@ def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; -def FeatureEnableHugePrivateBuffer : SubtargetFeature< - "huge-private-buffer", - "EnableHugePrivateBuffer", - "true", - "Enable private/scratch buffer sizes greater than 128 GB" ->; - def FeatureDumpCode : SubtargetFeature <"DumpCode", "DumpCode", "true", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index a88218f68b5..09b806bd06a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -190,7 +190,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, EnableCuMode(false), TrapHandler(false), - EnableHugePrivateBuffer(false), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), EnableSIScheduler(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 1ef72622980..34166aacf41 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -299,7 +299,6 @@ protected: bool TrapHandler; // Used as options. - bool EnableHugePrivateBuffer; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; bool EnableSIScheduler; @@ -377,6 +376,9 @@ private: SITargetLowering TLInfo; SIFrameLowering FrameLowering; + // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. + static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); + public: GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM); @@ -436,6 +438,11 @@ public: return Log2_32(WavefrontSize); } + /// Return the number of high bits known to be zero fror a frame index. + unsigned getKnownHighZeroBitsForFrameIndex() const { + return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); + } + int getLDSBankCount() const { return LDSBankCount; } @@ -526,10 +533,6 @@ public: return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } - bool enableHugePrivateBuffer() const { - return EnableHugePrivateBuffer; - } - bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c4c0e4047fc..c2cda5ef4d7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -93,12 +93,6 @@ static cl::opt<bool> EnableVGPRIndexMode( cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false)); -static cl::opt<unsigned> AssumeFrameIndexHighZeroBits( - "amdgpu-frame-index-zero-bits", - cl::desc("High bits of frame index assumed to be zero"), - cl::init(5), - cl::ReallyHidden); - static cl::opt<bool> DisableLoopAlignment( "amdgpu-disable-loop-alignment", cl::desc("Do not align and prefetch loops"), @@ -2059,13 +2053,14 @@ SDValue SITargetLowering::LowerFormalArguments( Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); - if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) { + if (Arg.Flags.isSRet()) { // The return object should be reasonably addressable. // FIXME: This helps when the return is a real sret. If it is a // automatically inserted sret (i.e. CanLowerReturn returns false), an // extra copy is inserted in SelectionDAGBuilder which obscures this. - unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits; + unsigned NumBits + = 32 - getSubtarget()->getKnownHighZeroBitsForFrameIndex(); Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits))); } @@ -9970,14 +9965,10 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts, DAG, Depth); - if (getSubtarget()->enableHugePrivateBuffer()) - return; - - // Technically it may be possible to have a dispatch with a single workitem - // that uses the full private memory size, but that's not really useful. We - // can't use vaddr in MUBUF instructions if we don't know the address + // Set the high bits to zero based on the maximum allowed scratch size per + // wave. We can't use vaddr in MUBUF instructions if we don't know the address // calculation won't overflow, so assume the sign bit is never set. - Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits); + Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex()); } unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { |

