diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-12-07 22:12:17 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-12-07 22:12:17 +0000 |
commit | b5613ecf173d5eeca82dce3be2b269feb4a75082 (patch) | |
tree | 70a8e65b9ca1fe8a54cdf817eb39e29b0631d7e5 /llvm/lib | |
parent | cc4b6920b36f81ff3a4370c6646de07e268e838b (diff) | |
download | bcm5719-llvm-b5613ecf173d5eeca82dce3be2b269feb4a75082.tar.gz bcm5719-llvm-b5613ecf173d5eeca82dce3be2b269feb4a75082.zip |
AMDGPU: Fix offsets for < 4-byte aggregate kernel arguments
We were still using the rounded down offset and alignment even though
they aren't handled because you can't trivially bitcast the loaded
value.
llvm-svn: 348658
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index fae1da92337..743dc7a0d00 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -122,14 +122,17 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { VectorType *VT = dyn_cast<VectorType>(ArgTy); bool IsV3 = VT && VT->getNumElements() == 3; + bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType(); + VectorType *V4Ty = nullptr; int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; - unsigned AdjustedAlign = MinAlign(KernArgBaseAlign, AlignDownOffset); + unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, + KernArgBaseAlign); Value *ArgPtr; - if (Size < 32 && !ArgTy->isAggregateType()) { // FIXME: Handle aggregate types + if (DoShiftOpt) { // FIXME: Handle aggregate types // Since we don't have sub-dword scalar loads, avoid doing an extload by // loading earlier than the argument address, and extracting the relevant // bits. @@ -147,7 +150,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { } else { ArgPtr = Builder.CreateConstInBoundsGEP1_64( KernArgSegment, - AlignDownOffset, + EltOffset, Arg.getName() + ".kernarg.offset"); ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); @@ -198,7 +201,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { // TODO: Convert noalias arg to !noalias - if (Size < 32 && !ArgTy->isAggregateType()) { + if (DoShiftOpt) { Value *ExtractBits = OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8); |