diff options
| author | Chris Lattner <sabre@nondot.org> | 2006-09-30 23:32:09 +0000 | 
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2006-09-30 23:32:09 +0000 | 
| commit | 24d3d4280a8ee823f769b9c7d2a980b9633b97a2 (patch) | |
| tree | bf6f7de4b7f81bf07bc19813a74442622baea888 /llvm/lib | |
| parent | 456a806692823808f1668994ace79ef4a72043ef (diff) | |
| download | bcm5719-llvm-24d3d4280a8ee823f769b9c7d2a980b9633b97a2.tar.gz bcm5719-llvm-24d3d4280a8ee823f769b9c7d2a980b9633b97a2.zip | |
Implement SRA of heap allocations.
llvm-svn: 30679
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/IPO/GlobalOpt.cpp | 276 | 
1 files changed, 266 insertions, 10 deletions
| diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 27862122601..f705d2d50c5 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -36,6 +36,7 @@ namespace {    Statistic<> NumMarked   ("globalopt", "Number of globals marked constant");    Statistic<> NumSRA      ("globalopt", "Number of aggregate globals broken "                             "into scalars"); +  Statistic<> NumHeapSRA  ("globalopt", "Number of heap objects SRA'd");    Statistic<> NumSubstitute("globalopt",                          "Number of globals with initializers stored into them");    Statistic<> NumDeleted  ("globalopt", "Number of globals deleted"); @@ -794,9 +795,235 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,        return false;      }    return true; +} + +/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV +/// somewhere.  Transform all uses of the allocation into loads from the +/// global and uses of the resultant pointer.  Further, delete the store into +/// GV.  This assumes that these value pass the  +/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,  +                                          GlobalVariable *GV) { +  while (!Alloc->use_empty()) { +    Instruction *U = Alloc->use_back(); +    if (StoreInst *SI = dyn_cast<StoreInst>(U)) { +      // If this is the store of the allocation into the global, remove it. +      if (SI->getOperand(1) == GV) { +        SI->eraseFromParent(); +        continue; +      } +    } +     +    // Insert a load from the global, and use it instead of the malloc. +    Value *NL = new LoadInst(GV, GV->getName()+".val", U); +    U->replaceUsesOfWith(Alloc, NL); +  } +} + +/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from +/// GV are simple enough to perform HeapSRA, return true. +static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) { +  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;  +       ++UI) +    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { +      // We permit two users of the load: setcc comparing against the null +      // pointer, and a getelementptr of a specific form. +      for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E;  +           ++UI) { +        // Comparison against null is ok. +        if (SetCondInst *SCI = dyn_cast<SetCondInst>(*UI)) { +          if (!isa<ConstantPointerNull>(SCI->getOperand(1))) +            return false; +          continue; +        } +         +        // getelementptr is also ok, but only a simple form. +        GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI); +        if (!GEPI) return false; +         +        // Must index into the array and into the struct. +        if (GEPI->getNumOperands() < 3) +          return false; +         +        // Otherwise the GEP is ok. +        continue; +      } +    } +  return true; +} + +/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr +/// is a value loaded from the global.  Eliminate all uses of Ptr, making them +/// use FieldGlobals instead.  All uses of loaded values satisfy +/// GlobalLoadUsesSimpleEnoughForHeapSRA. +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Ptr,  +                             const std::vector<GlobalVariable*> &FieldGlobals) { +  std::vector<Value *> InsertedLoadsForPtr; +  //InsertedLoadsForPtr.resize(FieldGlobals.size()); +  while (!Ptr->use_empty()) { +    Instruction *User = Ptr->use_back(); +     +    // If this is a comparison against null, handle it. +    if (SetCondInst *SCI = dyn_cast<SetCondInst>(User)) { +      assert(isa<ConstantPointerNull>(SCI->getOperand(1))); +      // If we have a setcc of the loaded pointer, we can use a setcc of any +      // field. +      Value *NPtr; +      if (InsertedLoadsForPtr.empty()) { +        NPtr = new LoadInst(FieldGlobals[0], Ptr->getName()+".f0", Ptr); +        InsertedLoadsForPtr.push_back(Ptr); +      } else { +        NPtr = InsertedLoadsForPtr.back(); +      } +       +      Value *New = new SetCondInst(SCI->getOpcode(), NPtr, +                                   Constant::getNullValue(NPtr->getType()), +                                   SCI->getName(), SCI); +      SCI->replaceAllUsesWith(New); +      SCI->eraseFromParent(); +      continue; +    } +     +    // Otherwise, this should be: 'getelementptr Ptr, Idx, uint FieldNo ...' +    GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); +    assert(GEPI->getNumOperands() >= 3 && isa<ConstantUInt>(GEPI->getOperand(2)) +           && "Unexpected GEPI!"); +     +    // Load the pointer for this field. +    unsigned FieldNo = cast<ConstantUInt>(GEPI->getOperand(2))->getValue(); +    if (InsertedLoadsForPtr.size() <= FieldNo) +      InsertedLoadsForPtr.resize(FieldNo+1); +    if (InsertedLoadsForPtr[FieldNo] == 0) +      InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo], +                                                  Ptr->getName()+".f" +  +                                                  utostr(FieldNo), Ptr); +    Value *NewPtr = InsertedLoadsForPtr[FieldNo]; + +    // Create the new GEP idx vector. +    std::vector<Value*> GEPIdx; +    GEPIdx.push_back(GEPI->getOperand(1)); +    GEPIdx.insert(GEPIdx.end(), GEPI->op_begin()+3, GEPI->op_end()); + +    Value *NGEPI = new GetElementPtrInst(NewPtr, GEPIdx, GEPI->getName(), GEPI); +    GEPI->replaceAllUsesWith(NGEPI); +    GEPI->eraseFromParent(); +  } +} + +/// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ +  /*DEBUG*/(std::cerr << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI); +  const StructType *STy = cast<StructType>(MI->getAllocatedType()); + +  // There is guaranteed to be at least one use of the malloc (storing +  // it into GV).  If there are other uses, change them to be uses of +  // the global to simplify later code.  This also deletes the store +  // into GV. +  ReplaceUsesOfMallocWithGlobal(MI, GV); +   +  // Okay, at this point, there are no users of the malloc.  Insert N +  // new mallocs at the same place as MI, and N globals. +  std::vector<GlobalVariable*> FieldGlobals; +  std::vector<MallocInst*> FieldMallocs; +   +  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ +    const Type *FieldTy = STy->getElementType(FieldNo); +    const Type *PFieldTy = PointerType::get(FieldTy); +     +    GlobalVariable *NGV = +      new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, +                         Constant::getNullValue(PFieldTy), +                         GV->getName() + ".f" + utostr(FieldNo), GV); +    FieldGlobals.push_back(NGV); +     +    MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), +                                     MI->getName() + ".f" + utostr(FieldNo),MI); +    FieldMallocs.push_back(NMI); +    new StoreInst(NMI, NGV, MI); +  } +   +  // The tricky aspect of this transformation is handling the case when malloc +  // fails.  In the original code, malloc failing would set the result pointer +  // of malloc to null.  In this case, some mallocs could succeed and others +  // could fail.  As such, we emit code that looks like this: +  //    F0 = malloc(field0) +  //    F1 = malloc(field1) +  //    F2 = malloc(field2) +  //    if (F0 == 0 || F1 == 0 || F2 == 0) { +  //      if (F0) { free(F0); F0 = 0; } +  //      if (F1) { free(F1); F1 = 0; } +  //      if (F2) { free(F2); F2 = 0; } +  //    } +  Value *RunningOr = 0; +  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { +    Value *Cond = new SetCondInst(Instruction::SetEQ, FieldMallocs[i], +                             Constant::getNullValue(FieldMallocs[i]->getType()), +                                  "isnull", MI); +    if (!RunningOr) +      RunningOr = Cond;   // First seteq +    else +      RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI); +  } + +  // Split the basic block at the old malloc. +  BasicBlock *OrigBB = MI->getParent(); +  BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); +   +  // Create the block to check the first condition.  Put all these blocks at the +  // end of the function as they are unlikely to be executed. +  BasicBlock *NullPtrBlock = new BasicBlock("malloc_ret_null", +                                            OrigBB->getParent()); +   +  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond +  // branch on RunningOr. +  OrigBB->getTerminator()->eraseFromParent(); +  new BranchInst(NullPtrBlock, ContBB, RunningOr, OrigBB); +   +  // Within the NullPtrBlock, we need to emit a comparison and branch for each +  // pointer, because some may be null while others are not. +  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { +    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); +    Value *Cmp = new SetCondInst(Instruction::SetNE, GVVal,  +                                 Constant::getNullValue(GVVal->getType()), +                                 "tmp", NullPtrBlock); +    BasicBlock *FreeBlock = new BasicBlock("free_it", OrigBB->getParent()); +    BasicBlock *NextBlock = new BasicBlock("next", OrigBB->getParent()); +    new BranchInst(FreeBlock, NextBlock, Cmp, NullPtrBlock); + +    // Fill in FreeBlock. +    new FreeInst(GVVal, FreeBlock); +    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], +                  FreeBlock); +    new BranchInst(NextBlock, FreeBlock); +     +    NullPtrBlock = NextBlock; +  } +   +  new BranchInst(ContBB, NullPtrBlock); +   +   +  // MI is no longer needed, remove it. +  MI->eraseFromParent(); +   +  // Okay, the malloc site is completely handled.  All of the uses of GV are now +  // loads, and all uses of those loads are simple.  Rewrite them to use loads +  // of the per-field globals instead. +  while (!GV->use_empty()) { +    LoadInst *LI = cast<LoadInst>(GV->use_back()); +    RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals); +    LI->eraseFromParent(); +  } + +  // The old global is now dead, remove it. +  GV->eraseFromParent(); + +  ++NumHeapSRA; +  return FieldGlobals[0];  } +  // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge  // that only one value (besides its initializer) is ever stored to the global.  static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, @@ -835,23 +1062,52 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,        if (!MI->getAllocatedType()->isSized())          return false; +      // We can't optimize this global unless all uses of it are *known* to be +      // of the malloc value, not of the null initializer value (consider a use +      // that compares the global's value against zero to see if the malloc has +      // been reached).  To do this, we check to see if all uses of the global +      // would trap if the global were null: this proves that they must all +      // happen after the malloc. +      if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) +        return false; + +      // We can't optimize this if the malloc itself is used in a complex way, +      // for example, being stored into multiple globals.  This allows the +      // malloc to be stored into the specified global, loaded setcc'd, and +      // GEP'd.  These are all things we could transform to using the global +      // for. +      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) +        return false; + +              // If we have a global that is only initialized with a fixed size malloc, -      // and if all users of the malloc trap, and if the malloc'd address is not -      // put anywhere else, transform the program to use global memory instead -      // of malloc'd memory.  This eliminates dynamic allocation (good) and -      // exposes the resultant global to further GlobalOpt (even better).  Note -      // that we restrict this transformation to only working on small -      // allocations (2048 bytes currently), as we don't want to introduce a 16M -      // global or something. +      // transform the program to use global memory instead of malloc'd memory. +      // This eliminates dynamic allocation, avoids an indirection accessing the +      // data, and exposes the resultant global to further GlobalOpt.        if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) { +        // Restrict this transformation to only working on small allocations +        // (2048 bytes currently), as we don't want to introduce a 16M global or +        // something.          if (NElements->getRawValue()* -                     TD.getTypeSize(MI->getAllocatedType()) < 2048 && -            AllUsesOfLoadedValueWillTrapIfNull(GV) && -            ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) { +                     TD.getTypeSize(MI->getAllocatedType()) < 2048) {            GVI = OptimizeGlobalAddressOfMalloc(GV, MI);            return true;          }        } + +      // If the allocation is an array of structures, consider transforming this +      // into multiple malloc'd arrays, one for each field.  This is basically +      // SRoA for malloc'd memory. +      if (const StructType *AllocTy =  +                  dyn_cast<StructType>(MI->getAllocatedType())) { +        // This the structure has an unreasonable number of fields, leave it +        // alone. +        if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 && +            GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) { +          GVI = PerformHeapAllocSRoA(GV, MI); +          return true; +        } +      }      }    } | 

