summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-12-07 22:12:17 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-12-07 22:12:17 +0000
commitb5613ecf173d5eeca82dce3be2b269feb4a75082 (patch)
tree70a8e65b9ca1fe8a54cdf817eb39e29b0631d7e5 /llvm/lib
parentcc4b6920b36f81ff3a4370c6646de07e268e838b (diff)
downloadbcm5719-llvm-b5613ecf173d5eeca82dce3be2b269feb4a75082.tar.gz
bcm5719-llvm-b5613ecf173d5eeca82dce3be2b269feb4a75082.zip
AMDGPU: Fix offsets for < 4-byte aggregate kernel arguments
We were still using the rounded down offset and alignment even though they aren't handled because you can't trivially bitcast the loaded value. llvm-svn: 348658
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp11
1 files changed, 7 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index fae1da92337..743dc7a0d00 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -122,14 +122,17 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
VectorType *VT = dyn_cast<VectorType>(ArgTy);
bool IsV3 = VT && VT->getNumElements() == 3;
+ bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
+
VectorType *V4Ty = nullptr;
int64_t AlignDownOffset = alignDown(EltOffset, 4);
int64_t OffsetDiff = EltOffset - AlignDownOffset;
- unsigned AdjustedAlign = MinAlign(KernArgBaseAlign, AlignDownOffset);
+ unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
+ KernArgBaseAlign);
Value *ArgPtr;
- if (Size < 32 && !ArgTy->isAggregateType()) { // FIXME: Handle aggregate types
+ if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
// bits.
@@ -147,7 +150,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
KernArgSegment,
- AlignDownOffset,
+ EltOffset,
Arg.getName() + ".kernarg.offset");
ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
@@ -198,7 +201,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// TODO: Convert noalias arg to !noalias
- if (Size < 32 && !ArgTy->isAggregateType()) {
+ if (DoShiftOpt) {
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
OpenPOWER on IntegriCloud