diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f7fe652dbea..43c4be359f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -94,6 +94,12 @@ static cl::opt<bool> EnableVGPRIndexMode( cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false)); +static cl::opt<unsigned> AssumeFrameIndexHighZeroBits( + "amdgpu-frame-index-zero-bits", + cl::desc("High bits of frame index assumed to be zero"), + cl::init(5), + cl::ReallyHidden); + static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -1600,6 +1606,17 @@ SDValue SITargetLowering::LowerFormalArguments( Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); + if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) { + // The return object should be reasonably addressable. + + // FIXME: This helps when the return is a real sret. If it is a + // automatically inserted sret (i.e. CanLowerReturn returns false), an + // extra copy is inserted in SelectionDAGBuilder which obscures this. + unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits; + Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits))); + } + // If this is an 8 or 16-bit value, it is really passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. @@ -3216,7 +3233,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); - case ISD::TRAP: case ISD::DEBUGTRAP: return lowerTRAP(Op, DAG); @@ -6997,3 +7013,21 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { TargetLoweringBase::finalizeLowering(MF); } + +void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts, + DAG, Depth); + + if (getSubtarget()->enableHugePrivateBuffer()) + return; + + // Technically it may be possible to have a dispatch with a single workitem + // that uses the full private memory size, but that's not really useful. We + // can't use vaddr in MUBUF instructions if we don't know the address + // calculation won't overflow, so assume the sign bit is never set. + Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits); +} |

