diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index ec7ea2baec0..5d087c09918 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -328,6 +328,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { // Currently only handle the case where the Pointer Operand is a GEP. // Also we could not vectorize volatile or atomic loads. LoadInst *LI = cast<LoadInst>(Inst); + if (isa<AllocaInst>(User) && + LI->getPointerOperandType() == User->getType() && + isa<VectorType>(LI->getType())) + return true; return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple(); } case Instruction::BitCast: @@ -337,6 +341,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { // since it should be canonical form, the User should be a GEP. // Also we could not vectorize volatile or atomic stores. StoreInst *SI = cast<StoreInst>(Inst); + if (isa<AllocaInst>(User) && + SI->getPointerOperandType() == User->getType() && + isa<VectorType>(SI->getValueOperand()->getType())) + return true; return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple(); } default: @@ -351,7 +359,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { return false; } - ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); + Type *AT = Alloca->getAllocatedType(); + SequentialType *AllocaTy = dyn_cast<SequentialType>(AT); LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n"); @@ -398,7 +407,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { } } - VectorType *VectorTy = arrayTypeToVecType(AllocaTy); + VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy); + if (!VectorTy) + VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy)); LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " << *VectorTy << '\n'); @@ -408,6 +419,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { + if (Inst->getType() == AT) + break; + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -420,9 +434,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { break; } case Instruction::Store: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); - StoreInst *SI = cast<StoreInst>(Inst); + if (SI->getValueOperand()->getType() == AT) + break; + + Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); |