diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 43 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 1 |
10 files changed, 80 insertions, 56 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7a9b160f213..f679ea5b98c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -311,7 +311,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); - OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4); + OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4); } } @@ -494,10 +494,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, Ctx.diagnose(Diag); } - if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) { + if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) { LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory", - MFI->LDSSize, DS_Error); + MFI->getLDSSize(), DS_Error); Ctx.diagnose(Diag); } @@ -531,7 +531,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, unsigned LDSSpillSize = MFI->LDSWaveSpillSize * MFI->getMaximumWorkGroupSize(MF); - ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize; + ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; ProgInfo.LDSBlocks = alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift; @@ -707,7 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, if (STM.isXNACKEnabled()) header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; - header.kernarg_segment_byte_size = MFI->ABIArgOffset; + // FIXME: Should use getKernArgSize + header.kernarg_segment_byte_size = MFI->getABIArgOffset(); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 8435551c3c5..d7c96db61a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -763,24 +763,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, if (hasDefinedInitializer(GV)) break; - unsigned Offset; - if (MFI->LocalMemoryObjects.count(GV) == 0) { - unsigned Align = GV->getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(GV->getValueType()); - - /// TODO: We should sort these to minimize wasted space due to alignment - /// padding. Currently the padding is decided by the first encountered use - /// during lowering. - Offset = MFI->LDSSize = alignTo(MFI->LDSSize, Align); - MFI->LocalMemoryObjects[GV] = Offset; - MFI->LDSSize += DL.getTypeAllocSize(GV->getValueType()); - } else { - Offset = MFI->LocalMemoryObjects[GV]; - } - - return DAG.getConstant(Offset, SDLoc(Op), - getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS)); + unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); + return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); } } @@ -2653,7 +2637,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const { - uint64_t ArgOffset = MFI->ABIArgOffset; + uint64_t ArgOffset = MFI->getABIArgOffset(); switch (Param) { case GRID_DIM: return ArgOffset; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 44516dab04f..40c3327a98d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -1,23 +1,47 @@ +//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + #include "AMDGPUMachineFunction.h" +#include "AMDGPUSubtarget.h" using namespace llvm; -// Pin the vtable to this file. -void AMDGPUMachineFunction::anchor() {} - AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), + LocalMemoryObjects(), KernArgSize(0), MaxKernArgAlign(0), LDSSize(0), ABIArgOffset(0), - ScratchSize(0), - IsKernel(MF.getFunction()->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL || - MF.getFunction()->getCallingConv() == llvm::CallingConv::SPIR_KERNEL) -{ + IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL || + MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) { + // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, + // except reserved size is not correctly aligned. } -bool AMDGPUMachineFunction::isKernel() const -{ - return IsKernel; +unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, + const GlobalValue &GV) { + auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); + if (!Entry.second) + return Entry.first->second; + + unsigned Align = GV.getAlignment(); + if (Align == 0) + Align = DL.getABITypeAlignment(GV.getValueType()); + + /// TODO: We should sort these to minimize wasted space due to alignment + /// padding. Currently the padding is decided by the first encountered use + /// during lowering. + unsigned Offset = LDSSize = alignTo(LDSSize, Align); + + Entry.first->second = Offset; + LDSSize += DL.getTypeAllocSize(GV.getValueType()); + + return Offset; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 6b31f63e1a9..0c8b79d496d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -11,15 +11,26 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H #include "llvm/CodeGen/MachineFunction.h" -#include <map> +#include "llvm/ADT/DenseMap.h" namespace llvm { class AMDGPUMachineFunction : public MachineFunctionInfo { + /// A map to keep track of local memory objects and their offsets within the + /// local memory space. + SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; + uint64_t KernArgSize; unsigned MaxKernArgAlign; - virtual void anchor(); + /// Number of bytes in the LDS that are being used. + unsigned LDSSize; + + // FIXME: This should probably be removed. + /// Start of implicit kernel args + unsigned ABIArgOffset; + + bool IsKernel; public: AMDGPUMachineFunction(const MachineFunction &MF); @@ -35,19 +46,27 @@ public: return Result; } - /// A map to keep track of local memory objects and their offsets within - /// the local memory space. - std::map<const GlobalValue *, unsigned> LocalMemoryObjects; - /// Number of bytes in the LDS that are being used. - unsigned LDSSize; + uint64_t getKernArgSize() const { + return KernArgSize; + } - /// Start of implicit kernel args - unsigned ABIArgOffset; + void setABIArgOffset(unsigned NewOffset) { + ABIArgOffset = NewOffset; + } + + unsigned getABIArgOffset() const { + return ABIArgOffset; + } - bool isKernel() const; + unsigned getLDSSize() const { + return LDSSize; + } - unsigned ScratchSize; - bool IsKernel; + bool isKernel() const { + return IsKernel; + } + + unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV); }; } diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 62c02d0e990..6dd47214f95 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1756,7 +1756,7 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = 36 + VA.getLocMemOffset(); + unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( @@ -1767,7 +1767,7 @@ SDValue R600TargetLowering::LowerFormalArguments( // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); - MFI->ABIArgOffset = Offset + MemVT.getStoreSize(); + MFI->setABIArgOffset(Offset + MemVT.getStoreSize()); } return Chain; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6746c8a97f1..7e6d212f730 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -770,7 +770,7 @@ SDValue SITargetLowering::LowerFormalArguments( } InVals.push_back(Arg); - Info->ABIArgOffset = Offset + MemVT.getStoreSize(); + Info->setABIArgOffset(Offset + MemVT.getStoreSize()); continue; } assert(VA.isRegLoc() && "Parameter must be in a register!"); @@ -1435,7 +1435,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( DebugLoc DL = MI.getDebugLoc(); BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32)) .addOperand(MI.getOperand(0)) - .addImm(MFI->LDSSize); + .addImm(MFI->getLDSSize()); MI.eraseFromParent(); return BB; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2772ad78f60..8fc52a98d9b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -807,7 +807,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( } // Add FrameIndex to LDS offset - unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); + unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize); BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) .addImm(LDSOffset) .addReg(TIDReg); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 26ac5031803..1071cf88380 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -516,7 +516,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { } } - if (NeedFlat && MFI->IsKernel) { + if (NeedFlat && MFI->isKernel()) { // TODO: What to use with function calls? // We will need to Initialize the flat scratch register pair. if (NeedFlat) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 450fa5db574..2b3c458218d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -26,9 +26,6 @@ static cl::opt<bool> EnableSpillSGPRToVGPR( cl::ReallyHidden, cl::init(true)); -// Pin the vtable to this file. -void SIMachineFunctionInfo::anchor() {} - SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 2eec9bf518c..7d0eb59a29a 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -28,7 +28,6 @@ class MachineRegisterInfo; class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // FIXME: This should be removed and getPreloadedValue moved here. friend struct SIRegisterInfo; - void anchor() override; unsigned TIDReg; |