diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 321323fb21c..007321d4704 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -496,10 +496,12 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); - // FIXME: This is the maximum work group size. We should try to get - // value from the reqd_work_group_size function attribute if it is - // available. - unsigned WorkGroupSize = 256; + const Function &ContainingFunction = *I.getParent()->getParent(); + + // FIXME: We should also try to get this value from the reqd_work_group_size + // function attribute if it is available. + unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); + int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); @@ -520,7 +522,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { Function *F = I.getParent()->getParent(); - Type *GVTy = ArrayType::get(I.getAllocatedType(), 256); + Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), |