diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 8 |
4 files changed, 15 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 38804723c92..75982075325 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -806,7 +806,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { Type *I32Ty = Builder.getInt32Ty(); Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace()); Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT); - LoadInst *WidenLoad = Builder.CreateLoad(BitCast); + LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, BitCast); WidenLoad->copyMetadata(I); // If we have range metadata, we need to convert the type, and not make diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index f259f8311ec..581e229b4a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1357,12 +1357,12 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, if (!isSin) { // CI->cos, UI->sin B.SetInsertPoint(&*ItOld); UI->replaceAllUsesWith(&*Call); - Instruction *Reload = B.CreateLoad(Alloc); + Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); CI->replaceAllUsesWith(Reload); UI->eraseFromParent(); CI->eraseFromParent(); } else { // CI->sin, UI->cos - Instruction *Reload = B.CreateLoad(Alloc); + Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); UI->replaceAllUsesWith(Reload); CI->replaceAllUsesWith(Call); UI->eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index aaad8b6e48a..b2637418ebb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -132,6 +132,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { KernArgBaseAlign); Value *ArgPtr; + Type *AdjustedArgTy; if (DoShiftOpt) { // FIXME: Handle aggregate types // Since we don't have sub-dword scalar loads, avoid doing an extload by // loading earlier than the argument address, and extracting the relevant @@ -144,25 +145,25 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { KernArgSegment, AlignDownOffset, Arg.getName() + ".kernarg.offset.align.down"); - ArgPtr = Builder.CreateBitCast(ArgPtr, - Builder.getInt32Ty()->getPointerTo(AS), - ArgPtr->getName() + ".cast"); + AdjustedArgTy = Builder.getInt32Ty(); } else { ArgPtr = Builder.CreateConstInBoundsGEP1_64( KernArgSegment, EltOffset, Arg.getName() + ".kernarg.offset"); - ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS), - ArgPtr->getName() + ".cast"); + AdjustedArgTy = ArgTy; } if (IsV3 && Size >= 32) { V4Ty = VectorType::get(VT->getVectorElementType(), 4); // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads - ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->getPointerTo(AS)); + AdjustedArgTy = V4Ty; } - LoadInst *Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign); + ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), + ArgPtr->getName() + ".cast"); + LoadInst *Load = + Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 8bf2b13c654..5f05ce7d2a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -245,10 +245,10 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { // 32-bit and extract sequence is already present, and it is probably easier // to CSE this. The loads should be mergable later anyway. Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1); - LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4); + LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4); Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2); - LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4); + LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4); MDNode *MD = MDNode::get(Mod->getContext(), None); LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD); @@ -426,7 +426,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); - Value *VecValue = Builder.CreateLoad(BitCast); + Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); Inst->replaceAllUsesWith(ExtractElement); Inst->eraseFromParent(); @@ -441,7 +441,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); - Value *VecValue = Builder.CreateLoad(BitCast); + Value *VecValue = Builder.CreateLoad(VectorTy, BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, SI->getValueOperand(), Index); |