summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorJames Y Knight <jyknight@google.com>2019-02-01 20:44:24 +0000
committerJames Y Knight <jyknight@google.com>2019-02-01 20:44:24 +0000
commit14359ef1b6a0610ac91df5f5a91c88a0b51c187c (patch)
tree53b7628ce6ecba998379d0d19f875bc9dad3b69a /llvm/lib/Target/AMDGPU
parentd9e85a0861b7e9320c34547a2ad7f49c504a9381 (diff)
downloadbcm5719-llvm-14359ef1b6a0610ac91df5f5a91c88a0b51c187c.tar.gz
bcm5719-llvm-14359ef1b6a0610ac91df5f5a91c88a0b51c187c.zip
[opaque pointer types] Pass value type to LoadInst creation.
This cleans up all LoadInst creation in LLVM to explicitly pass the value type rather than deriving it from the pointer's element-type. Differential Revision: https://reviews.llvm.org/D57172 llvm-svn: 352911
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp8
4 files changed, 15 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 38804723c92..75982075325 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -806,7 +806,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
Type *I32Ty = Builder.getInt32Ty();
Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
- LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
+ LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, BitCast);
WidenLoad->copyMetadata(I);
// If we have range metadata, we need to convert the type, and not make
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index f259f8311ec..581e229b4a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1357,12 +1357,12 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
if (!isSin) { // CI->cos, UI->sin
B.SetInsertPoint(&*ItOld);
UI->replaceAllUsesWith(&*Call);
- Instruction *Reload = B.CreateLoad(Alloc);
+ Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
CI->replaceAllUsesWith(Reload);
UI->eraseFromParent();
CI->eraseFromParent();
} else { // CI->sin, UI->cos
- Instruction *Reload = B.CreateLoad(Alloc);
+ Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
UI->replaceAllUsesWith(Reload);
CI->replaceAllUsesWith(Call);
UI->eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index aaad8b6e48a..b2637418ebb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -132,6 +132,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
KernArgBaseAlign);
Value *ArgPtr;
+ Type *AdjustedArgTy;
if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
@@ -144,25 +145,25 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
KernArgSegment,
AlignDownOffset,
Arg.getName() + ".kernarg.offset.align.down");
- ArgPtr = Builder.CreateBitCast(ArgPtr,
- Builder.getInt32Ty()->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
+ AdjustedArgTy = Builder.getInt32Ty();
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
KernArgSegment,
EltOffset,
Arg.getName() + ".kernarg.offset");
- ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
+ AdjustedArgTy = ArgTy;
}
if (IsV3 && Size >= 32) {
V4Ty = VectorType::get(VT->getVectorElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
- ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->getPointerTo(AS));
+ AdjustedArgTy = V4Ty;
}
- LoadInst *Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign);
+ ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
+ ArgPtr->getName() + ".cast");
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 8bf2b13c654..5f05ce7d2a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -245,10 +245,10 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
// 32-bit and extract sequence is already present, and it is probably easier
// to CSE this. The loads should be mergable later anyway.
Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
- LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
+ LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4);
Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
- LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
+ LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4);
MDNode *MD = MDNode::get(Mod->getContext(), None);
LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
@@ -426,7 +426,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
Inst->replaceAllUsesWith(ExtractElement);
Inst->eraseFromParent();
@@ -441,7 +441,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
SI->getValueOperand(),
Index);
OpenPOWER on IntegriCloud