summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h43
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h1
10 files changed, 80 insertions, 56 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 7a9b160f213..f679ea5b98c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -311,7 +311,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
- OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
+ OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
}
}
@@ -494,10 +494,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
Ctx.diagnose(Diag);
}
- if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) {
+ if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
- MFI->LDSSize, DS_Error);
+ MFI->getLDSSize(), DS_Error);
Ctx.diagnose(Diag);
}
@@ -531,7 +531,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
MFI->getMaximumWorkGroupSize(MF);
- ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+ ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
ProgInfo.LDSBlocks =
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
@@ -707,7 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
if (STM.isXNACKEnabled())
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
- header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+ // FIXME: Should use getKernArgSize
+ header.kernarg_segment_byte_size = MFI->getABIArgOffset();
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8435551c3c5..d7c96db61a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -763,24 +763,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
if (hasDefinedInitializer(GV))
break;
- unsigned Offset;
- if (MFI->LocalMemoryObjects.count(GV) == 0) {
- unsigned Align = GV->getAlignment();
- if (Align == 0)
- Align = DL.getABITypeAlignment(GV->getValueType());
-
- /// TODO: We should sort these to minimize wasted space due to alignment
- /// padding. Currently the padding is decided by the first encountered use
- /// during lowering.
- Offset = MFI->LDSSize = alignTo(MFI->LDSSize, Align);
- MFI->LocalMemoryObjects[GV] = Offset;
- MFI->LDSSize += DL.getTypeAllocSize(GV->getValueType());
- } else {
- Offset = MFI->LocalMemoryObjects[GV];
- }
-
- return DAG.getConstant(Offset, SDLoc(Op),
- getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
+ unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+ return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
}
@@ -2653,7 +2637,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
- uint64_t ArgOffset = MFI->ABIArgOffset;
+ uint64_t ArgOffset = MFI->getABIArgOffset();
switch (Param) {
case GRID_DIM:
return ArgOffset;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 44516dab04f..40c3327a98d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,23 +1,47 @@
+//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
#include "AMDGPUMachineFunction.h"
+#include "AMDGPUSubtarget.h"
using namespace llvm;
-// Pin the vtable to this file.
-void AMDGPUMachineFunction::anchor() {}
-
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
+ LocalMemoryObjects(),
KernArgSize(0),
MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
- ScratchSize(0),
- IsKernel(MF.getFunction()->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL ||
- MF.getFunction()->getCallingConv() == llvm::CallingConv::SPIR_KERNEL)
-{
+ IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) {
+ // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
+ // except reserved size is not correctly aligned.
}
-bool AMDGPUMachineFunction::isKernel() const
-{
- return IsKernel;
+unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
+ const GlobalValue &GV) {
+ auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
+ if (!Entry.second)
+ return Entry.first->second;
+
+ unsigned Align = GV.getAlignment();
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(GV.getValueType());
+
+ /// TODO: We should sort these to minimize wasted space due to alignment
+ /// padding. Currently the padding is decided by the first encountered use
+ /// during lowering.
+ unsigned Offset = LDSSize = alignTo(LDSSize, Align);
+
+ Entry.first->second = Offset;
+ LDSSize += DL.getTypeAllocSize(GV.getValueType());
+
+ return Offset;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 6b31f63e1a9..0c8b79d496d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -11,15 +11,26 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
#include "llvm/CodeGen/MachineFunction.h"
-#include <map>
+#include "llvm/ADT/DenseMap.h"
namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
+ /// A map to keep track of local memory objects and their offsets within the
+ /// local memory space.
+ SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
+
uint64_t KernArgSize;
unsigned MaxKernArgAlign;
- virtual void anchor();
+ /// Number of bytes in the LDS that are being used.
+ unsigned LDSSize;
+
+ // FIXME: This should probably be removed.
+ /// Start of implicit kernel args
+ unsigned ABIArgOffset;
+
+ bool IsKernel;
public:
AMDGPUMachineFunction(const MachineFunction &MF);
@@ -35,19 +46,27 @@ public:
return Result;
}
- /// A map to keep track of local memory objects and their offsets within
- /// the local memory space.
- std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
- /// Number of bytes in the LDS that are being used.
- unsigned LDSSize;
+ uint64_t getKernArgSize() const {
+ return KernArgSize;
+ }
- /// Start of implicit kernel args
- unsigned ABIArgOffset;
+ void setABIArgOffset(unsigned NewOffset) {
+ ABIArgOffset = NewOffset;
+ }
+
+ unsigned getABIArgOffset() const {
+ return ABIArgOffset;
+ }
- bool isKernel() const;
+ unsigned getLDSSize() const {
+ return LDSSize;
+ }
- unsigned ScratchSize;
- bool IsKernel;
+ bool isKernel() const {
+ return IsKernel;
+ }
+
+ unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
};
}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 62c02d0e990..6dd47214f95 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1756,7 +1756,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
- unsigned Offset = 36 + VA.getLocMemOffset();
+ unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(
@@ -1767,7 +1767,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
// 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
- MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
+ MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
}
return Chain;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6746c8a97f1..7e6d212f730 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -770,7 +770,7 @@ SDValue SITargetLowering::LowerFormalArguments(
}
InVals.push_back(Arg);
- Info->ABIArgOffset = Offset + MemVT.getStoreSize();
+ Info->setABIArgOffset(Offset + MemVT.getStoreSize());
continue;
}
assert(VA.isRegLoc() && "Parameter must be in a register!");
@@ -1435,7 +1435,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.addOperand(MI.getOperand(0))
- .addImm(MFI->LDSSize);
+ .addImm(MFI->getLDSSize());
MI.eraseFromParent();
return BB;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2772ad78f60..8fc52a98d9b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -807,7 +807,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
}
// Add FrameIndex to LDS offset
- unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
+ unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
.addImm(LDSOffset)
.addReg(TIDReg);
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 26ac5031803..1071cf88380 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -516,7 +516,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (NeedFlat && MFI->IsKernel) {
+ if (NeedFlat && MFI->isKernel()) {
// TODO: What to use with function calls?
// We will need to Initialize the flat scratch register pair.
if (NeedFlat)
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 450fa5db574..2b3c458218d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -26,9 +26,6 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
cl::ReallyHidden,
cl::init(true));
-// Pin the vtable to this file.
-void SIMachineFunctionInfo::anchor() {}
-
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 2eec9bf518c..7d0eb59a29a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -28,7 +28,6 @@ class MachineRegisterInfo;
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// FIXME: This should be removed and getPreloadedValue moved here.
friend struct SIRegisterInfo;
- void anchor() override;
unsigned TIDReg;
OpenPOWER on IntegriCloud