summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-07-13 16:40:25 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-07-13 16:40:25 +0000
commitde950777804d0fb9ce42190e1fe28e28ec033c2b (patch)
tree4a122ffa5261339ce2e6fa471542c9a38fb76996 /llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
parent218b6a2a2ae682f0e2b42a9f76619a382b723430 (diff)
downloadbcm5719-llvm-de950777804d0fb9ce42190e1fe28e28ec033c2b.tar.gz
bcm5719-llvm-de950777804d0fb9ce42190e1fe28e28ec033c2b.zip
AMDGPU: Fix handling of alignment padding in DAG argument lowering
This was completely broken if there was ever a struct argument, as this information is thrown away during the argument analysis. The offsets as passed in to LowerFormalArguments are not useful, as they partially depend on the legalized result register type, and they don't consider the alignment in the first place. Ignore the Ins array, and instead figure out from the raw IR type what we need to do. This seems to fix the padding computation if the DAG lowering is forced (and stops breaking arguments following padded arguments if the arguments were only partially lowered in the IR) llvm-svn: 337021
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h28
1 files changed, 14 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index d9806d6133c..62310973365 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -51,7 +51,7 @@ public:
enum Generation {
R600 = 0,
R700 = 1,
- EVERGREEN = 2,
+ EVERGREEN = 2,
NORTHERN_ISLANDS = 3,
SOUTHERN_ISLANDS = 4,
SEA_ISLANDS = 5,
@@ -82,7 +82,7 @@ public:
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
- const Function &F);
+ const Function &F);
/// \returns Default range flat work group size for a calling convention.
std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
@@ -231,6 +231,18 @@ public:
/// Creates value range metadata on an workitemid.* inrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
+ /// \returns Number of bytes of arguments that are passed to a shader or
+ /// kernel in addition to the explicit ones declared for the function.
+ unsigned getImplicitArgNumBytes(const Function &F) const {
+ if (isMesaKernel(F))
+ return 16;
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
+ }
+ uint64_t getExplicitKernArgSize(const Function &F,
+ unsigned &MaxAlign) const;
+ unsigned getKernArgSegmentSize(const Function &F,
+ unsigned &MaxAlign) const;
+
virtual ~AMDGPUSubtarget() {}
};
@@ -669,14 +681,6 @@ public:
return D16PreservesUnusedBits;
}
- /// \returns Number of bytes of arguments that are passed to a shader or
- /// kernel in addition to the explicit ones declared for the function.
- unsigned getImplicitArgNumBytes(const Function &F) const {
- if (isMesaKernel(F))
- return 16;
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
- }
-
// Scratch is allocated in 256 dword per wave blocks for the entire
// wavefront. When viewed from the perspecive of an arbitrary workitem, this
// is 4-byte aligned.
@@ -825,10 +829,6 @@ public:
return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
- uint64_t getExplicitKernArgSize(const Function &F) const;
- unsigned getKernArgSegmentSize(const Function &F,
- int64_t ExplicitArgBytes = -1) const;
-
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
OpenPOWER on IntegriCloud