diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 |
1 files changed, 3 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index db1f2b3a3c3..5b7fc2656a2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1165,8 +1165,8 @@ SDValue SITargetLowering::lowerKernargMemParameter( // Try to avoid using an extload by loading earlier than the argument address, // and extracting the relevant bits. The load should hopefully be merged with // the previous argument. - if (Align < 4) { - assert(MemVT.getStoreSize() < 4); + if (MemVT.getStoreSize() < 4 && Align < 4) { + // TODO: Handle align < 4 and size >= 4 (can happen with packed structs). int64_t AlignDownOffset = alignDown(Offset, 4); int64_t OffsetDiff = Offset - AlignDownOffset; @@ -1797,7 +1797,6 @@ SDValue SITargetLowering::LowerFormalArguments( // FIXME: Alignment of explicit arguments totally broken with non-0 explicit // kern arg offset. const unsigned KernelArgBaseAlign = 16; - const unsigned ExplicitOffset = Subtarget->getExplicitKernelArgOffset(Fn); for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { const ISD::InputArg &Arg = Ins[i]; @@ -1813,11 +1812,9 @@ SDValue SITargetLowering::LowerFormalArguments( VT = Ins[i].VT; EVT MemVT = VA.getLocVT(); - const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset(); + const uint64_t Offset = VA.getLocMemOffset(); unsigned Align = MinAlign(KernelArgBaseAlign, Offset); - // The first 36 bytes of the input buffer contains information about - // thread group and global sizes for clover. SDValue Arg = lowerKernargMemParameter( DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]); Chains.push_back(Arg.getValue(1)); |