summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp12
1 files changed, 7 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 321323fb21c..007321d4704 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -496,10 +496,12 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
- // FIXME: This is the maximum work group size. We should try to get
- // value from the reqd_work_group_size function attribute if it is
- // available.
- unsigned WorkGroupSize = 256;
+ const Function &ContainingFunction = *I.getParent()->getParent();
+
+ // FIXME: We should also try to get this value from the reqd_work_group_size
+ // function attribute if it is available.
+ unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
+
int AllocaSize =
WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
@@ -520,7 +522,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Function *F = I.getParent()->getParent();
- Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
+ Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
GlobalVariable *GV = new GlobalVariable(
*Mod, GVTy, false, GlobalValue::InternalLinkage,
UndefValue::get(GVTy),
OpenPOWER on IntegriCloud