summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp9
1 files changed, 3 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index db1f2b3a3c3..5b7fc2656a2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1165,8 +1165,8 @@ SDValue SITargetLowering::lowerKernargMemParameter(
// Try to avoid using an extload by loading earlier than the argument address,
// and extracting the relevant bits. The load should hopefully be merged with
// the previous argument.
- if (Align < 4) {
- assert(MemVT.getStoreSize() < 4);
+ if (MemVT.getStoreSize() < 4 && Align < 4) {
+ // TODO: Handle align < 4 and size >= 4 (can happen with packed structs).
int64_t AlignDownOffset = alignDown(Offset, 4);
int64_t OffsetDiff = Offset - AlignDownOffset;
@@ -1797,7 +1797,6 @@ SDValue SITargetLowering::LowerFormalArguments(
// FIXME: Alignment of explicit arguments totally broken with non-0 explicit
// kern arg offset.
const unsigned KernelArgBaseAlign = 16;
- const unsigned ExplicitOffset = Subtarget->getExplicitKernelArgOffset(Fn);
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
@@ -1813,11 +1812,9 @@ SDValue SITargetLowering::LowerFormalArguments(
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
- const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset();
+ const uint64_t Offset = VA.getLocMemOffset();
unsigned Align = MinAlign(KernelArgBaseAlign, Offset);
- // The first 36 bytes of the input buffer contains information about
- // thread group and global sizes for clover.
SDValue Arg = lowerKernargMemParameter(
DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]);
Chains.push_back(Arg.getValue(1));
OpenPOWER on IntegriCloud