summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/IPO/GlobalOpt.cpp
diff options
context:
space:
mode:
authorVictor Hernandez <vhernandez@apple.com>2009-11-05 00:03:03 +0000
committerVictor Hernandez <vhernandez@apple.com>2009-11-05 00:03:03 +0000
commit492ed30a329dd733555d320382eaa9854968ca6a (patch)
tree5643b28c1142a5eccdc19f0f7f1fc8a1de9ab4f8 /llvm/lib/Transforms/IPO/GlobalOpt.cpp
parentd0ba0e6108f42059894c0527c3049292f6b0f479 (diff)
downloadbcm5719-llvm-492ed30a329dd733555d320382eaa9854968ca6a.tar.gz
bcm5719-llvm-492ed30a329dd733555d320382eaa9854968ca6a.zip
Update CreateMalloc so that its callers specify the size to allocate:
MallocInst-autoupgrade users use non-TargetData-computed allocation sizes. Optimization uses use TargetData to compute the allocation size. Now that malloc calls can have constant sizes, update isArrayMallocHelper() to use TargetData to determine the size of the malloced type and the size of malloced arrays. Extend getMallocType() to support malloc calls that have non-bitcast uses. Update OptimizeGlobalAddressOfMalloc() to optimize malloc calls that have non-bitcast uses. The bitcast use of a malloc call has to be treated specially here because the uses of the bitcast need to be replaced and the bitcast needs to be erased (just like the malloc call) for OptimizeGlobalAddressOfMalloc() to work correctly. Update PerformHeapAllocSRoA() to optimize malloc calls that have non-bitcast uses. The bitcast use of the malloc is not handled specially here because ReplaceUsesOfMallocWithGlobal replaces through the bitcast use. Update OptimizeOnceStoredGlobal() to not care about the malloc calls' bitcast use. Update all globalopt malloc tests to not rely on autoupgraded-MallocInsts, but instead use explicit malloc calls with correct allocation sizes. llvm-svn: 86077
Diffstat (limited to 'llvm/lib/Transforms/IPO/GlobalOpt.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp142
1 files changed, 74 insertions, 68 deletions
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 5dab9efab22..234d0ecc2e5 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -822,32 +822,42 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
/// malloc into a global, and any loads of GV as uses of the new global.
static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
- BitCastInst *BCI,
+ const Type *AllocTy,
Value* NElems,
LLVMContext &Context,
TargetData* TD) {
- DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
- << " CALL = " << *CI << " BCI = " << *BCI << '\n');
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
const Type *IntPtrTy = TD->getIntPtrType(Context);
+ // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
+ // returned NULL and we would not be here).
+ BitCastInst *BCI = NULL;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
+ if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+ break;
+
ConstantInt *NElements = cast<ConstantInt>(NElems);
if (NElements->getZExtValue() != 1) {
// If we have an array allocation, transform it to a single element
// allocation to make the code below simpler.
- Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
- NElements->getZExtValue());
- Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
- Instruction* NewMI = cast<Instruction>(NewM);
+ Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
+ unsigned TypeSize = TD->getTypeAllocSize(NewTy);
+ if (const StructType *ST = dyn_cast<StructType>(NewTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
+ ConstantInt::get(IntPtrTy, TypeSize));
Value* Indices[2];
Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
- Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
- NewMI->getName()+".el0", CI);
- BCI->replaceAllUsesWith(NewGEP);
- BCI->eraseFromParent();
+ Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
+ NewCI->getName()+".el0", CI);
+ Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
+ if (BCI) BCI->replaceAllUsesWith(NewGEP);
+ CI->replaceAllUsesWith(Cast);
+ if (BCI) BCI->eraseFromParent();
CI->eraseFromParent();
- BCI = cast<BitCastInst>(NewMI);
- CI = extractMallocCallFromBitCast(NewMI);
+ BCI = dyn_cast<BitCastInst>(NewCI);
+ CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
}
// Create the new global variable. The contents of the malloc'd memory is
@@ -861,8 +871,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
GV,
GV->isThreadLocal());
- // Anything that used the malloc now uses the global directly.
- BCI->replaceAllUsesWith(NewGV);
+ // Anything that used the malloc or its bitcast now uses the global directly.
+ if (BCI) BCI->replaceAllUsesWith(NewGV);
+ CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
@@ -930,9 +941,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
GV->getParent()->getGlobalList().insert(GV, InitBool);
- // Now the GV is dead, nuke it and the malloc.
+ // Now the GV is dead, nuke it and the malloc (both CI and BCI).
GV->eraseFromParent();
- BCI->eraseFromParent();
+ if (BCI) BCI->eraseFromParent();
CI->eraseFromParent();
// To further other optimizations, loop over all users of NewGV and try to
@@ -1273,13 +1284,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
- CallInst *CI, BitCastInst* BCI,
- Value* NElems,
- LLVMContext &Context,
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+ Value* NElems, LLVMContext &Context,
TargetData *TD) {
- DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI
- << " BITCAST = " << *BCI << '\n');
+ DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
const Type* MAT = getMallocAllocatedType(CI);
const StructType *STy = cast<StructType>(MAT);
@@ -1287,8 +1295,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
// it into GV). If there are other uses, change them to be uses of
// the global to simplify later code. This also deletes the store
// into GV.
- ReplaceUsesOfMallocWithGlobal(BCI, GV);
-
+ ReplaceUsesOfMallocWithGlobal(CI, GV);
+
// Okay, at this point, there are no users of the malloc. Insert N
// new mallocs at the same place as CI, and N globals.
std::vector<Value*> FieldGlobals;
@@ -1306,11 +1314,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
GV->isThreadLocal());
FieldGlobals.push_back(NGV);
- Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
- FieldTy, NElems,
- BCI->getName() + ".f" + Twine(FieldNo));
+ unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+ if (const StructType* ST = dyn_cast<StructType>(FieldTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ const Type* IntPtrTy = TD->getIntPtrType(Context);
+ Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+ ConstantInt::get(IntPtrTy, TypeSize),
+ NElems,
+ CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
- new StoreInst(NMI, NGV, BCI);
+ new StoreInst(NMI, NGV, CI);
}
// The tricky aspect of this transformation is handling the case when malloc
@@ -1327,18 +1340,18 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
// }
Value *RunningOr = 0;
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
- Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
- Constant::getNullValue(FieldMallocs[i]->getType()),
- "isnull");
+ Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull");
if (!RunningOr)
RunningOr = Cond; // First seteq
else
- RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
}
// Split the basic block at the old malloc.
- BasicBlock *OrigBB = BCI->getParent();
- BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
+ BasicBlock *OrigBB = CI->getParent();
+ BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
@@ -1374,9 +1387,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
}
BranchInst::Create(ContBB, NullPtrBlock);
-
- // CI and BCI are no longer needed, remove them.
- BCI->eraseFromParent();
+
+ // CI is no longer needed, remove it.
CI->eraseFromParent();
/// InsertedScalarizedLoads - As we process loads, if we can't immediately
@@ -1463,14 +1475,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
/// cast of malloc.
static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
- BitCastInst *BCI,
+ const Type *AllocTy,
Module::global_iterator &GVI,
TargetData *TD,
LLVMContext &Context) {
- // If we can't figure out the type being malloced, then we can't optimize.
- const Type *AllocTy = getMallocAllocatedType(CI);
- assert(AllocTy);
-
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1491,7 +1499,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// for.
{
SmallPtrSet<PHINode*, 8> PHIs;
- if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
return false;
}
@@ -1499,16 +1507,16 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// transform the program to use global memory instead of malloc'd memory.
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
- Value *NElems = getMallocArraySize(CI, Context, TD);
// We cannot optimize the malloc if we cannot determine malloc array size.
- if (NElems) {
+ if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (TD &&
NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
+ Context, TD);
return true;
}
@@ -1526,26 +1534,29 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This the structure has an unreasonable number of fields, leave it
// alone.
if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
- AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (const ArrayType *AT =
dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
- Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
- AT->getNumElements());
- Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
- AllocSTy, NumElements,
- BCI->getName());
- Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
- BCI->replaceAllUsesWith(Cast);
- BCI->eraseFromParent();
+ const Type *IntPtrTy = TD->getIntPtrType(Context);
+ unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+ Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+ Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+ Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+ AllocSize, NumElements,
+ CI->getName());
+ Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+ CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
- BCI = cast<BitCastInst>(NewMI);
- CI = extractMallocCallFromBitCast(NewMI);
+ CI = dyn_cast<BitCastInst>(Malloc) ?
+ extractMallocCallFromBitCast(Malloc):
+ cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD),
+ Context, TD);
return true;
}
}
@@ -1577,15 +1588,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
- if (getMallocAllocatedType(CI)) {
- BitCastInst* BCI = NULL;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
- UI != E; )
- BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++));
- if (BCI &&
- TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context))
- return true;
- }
+ const Type* MallocType = getMallocAllocatedType(CI);
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ GVI, TD, Context))
+ return true;
}
}
OpenPOWER on IntegriCloud