diff options
| author | Gordon Henriksen <gordonhenriksen@mac.com> | 2008-01-07 01:30:53 +0000 | 
|---|---|---|
| committer | Gordon Henriksen <gordonhenriksen@mac.com> | 2008-01-07 01:30:53 +0000 | 
| commit | 6047b6e14029e5dec3906745fd11c81676c108e9 (patch) | |
| tree | 508a232723d7f42f4cd62db1850807775b2e376a | |
| parent | 5180e8567502c8c2dbee52734e3a322b54a82577 (diff) | |
| download | bcm5719-llvm-6047b6e14029e5dec3906745fd11c81676c108e9.tar.gz bcm5719-llvm-6047b6e14029e5dec3906745fd11c81676c108e9.zip  | |
With this patch, the LowerGC transformation becomes the
ShadowStackCollector, which additionally has reduced overhead with
no sacrifice in portability.
Considering a function @fun with 8 loop-local roots,
ShadowStackCollector introduces the following overhead
(x86):
; shadowstack prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    $___gc_fun, 20(%esp)
        movl    $0, 24(%esp)
        movl    $0, 28(%esp)
        movl    $0, 32(%esp)
        movl    $0, 36(%esp)
        movl    $0, 40(%esp)
        movl    $0, 44(%esp)
        movl    $0, 48(%esp)
        movl    $0, 52(%esp)
        movl    %ecx, 16(%esp)
        leal    16(%esp), %ecx
        movl    %ecx, (%eax)
; shadowstack loop overhead
        (none)
; shadowstack epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)
; shadowstack metadata
        .align  3
___gc_fun:                              # __gc_fun
        .long   8
        .space  4
In comparison to LowerGC:
; lowergc prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    %ecx, 48(%esp)
        movl    $8, 52(%esp)
        movl    $0, 60(%esp)
        movl    $0, 56(%esp)
        movl    $0, 68(%esp)
        movl    $0, 64(%esp)
        movl    $0, 76(%esp)
        movl    $0, 72(%esp)
        movl    $0, 84(%esp)
        movl    $0, 80(%esp)
        movl    $0, 92(%esp)
        movl    $0, 88(%esp)
        movl    $0, 100(%esp)
        movl    $0, 96(%esp)
        movl    $0, 108(%esp)
        movl    $0, 104(%esp)
        movl    $0, 116(%esp)
        movl    $0, 112(%esp)
; lowergc loop overhead
        leal    44(%esp), %eax
        movl    %eax, 56(%esp)
        leal    40(%esp), %eax
        movl    %eax, 64(%esp)
        leal    36(%esp), %eax
        movl    %eax, 72(%esp)
        leal    32(%esp), %eax
        movl    %eax, 80(%esp)
        leal    28(%esp), %eax
        movl    %eax, 88(%esp)
        leal    24(%esp), %eax
        movl    %eax, 96(%esp)
        leal    20(%esp), %eax
        movl    %eax, 104(%esp)
        leal    16(%esp), %eax
        movl    %eax, 112(%esp)
; lowergc epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)
; lowergc metadata
        (none)
llvm-svn: 45670
| -rw-r--r-- | llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h | 3 | ||||
| -rw-r--r-- | llvm/include/llvm/LinkAllPasses.h | 1 | ||||
| -rw-r--r-- | llvm/include/llvm/Transforms/Scalar.h | 7 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/ShadowStackCollector.cpp | 441 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LowerGC.cpp | 350 | ||||
| -rw-r--r-- | llvm/runtime/GC/SemiSpace/semispace.c | 32 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Generic/GC/redundant_init.ll | 17 | 
7 files changed, 478 insertions, 373 deletions
diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h index 05d97cf9432..48be5411bcd 100644 --- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -17,6 +17,7 @@  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/Collectors.h"  namespace {    struct ForceCodegenLinking { @@ -35,6 +36,8 @@ namespace {        (void) llvm::createSimpleRegisterCoalescer(); +      (void) llvm::createShadowStackCollector(); +              (void) llvm::createBURRListDAGScheduler(NULL, NULL, NULL);        (void) llvm::createTDRRListDAGScheduler(NULL, NULL, NULL);        (void) llvm::createTDListDAGScheduler(NULL, NULL, NULL); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 75ab81e83af..9555dbc76f3 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -84,7 +84,6 @@ namespace {        (void) llvm::createLoopRotatePass();        (void) llvm::createLoopIndexSplitPass();        (void) llvm::createLowerAllocationsPass(); -      (void) llvm::createLowerGCPass();        (void) llvm::createLowerInvokePass();        (void) llvm::createLowerPackedPass();        (void) llvm::createLowerSelectPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index f1a101f6028..4e3b21af6ca 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -296,13 +296,6 @@ extern const PassInfo *LowerInvokePassID;  //===----------------------------------------------------------------------===//  // -// LowerGCPass - This function returns an instance of the "lowergc" pass, which -// lowers garbage collection intrinsics to normal LLVM code. -// -FunctionPass *createLowerGCPass(); - -//===----------------------------------------------------------------------===// -//  // BlockPlacement - This pass reorders basic blocks in order to increase the  // number of fall-through conditional branches.  // diff --git a/llvm/lib/CodeGen/ShadowStackCollector.cpp b/llvm/lib/CodeGen/ShadowStackCollector.cpp new file mode 100644 index 00000000000..1b619c96680 --- /dev/null +++ b/llvm/lib/CodeGen/ShadowStackCollector.cpp @@ -0,0 +1,441 @@ +//===-- ShadowStackCollector.cpp - GC support for uncooperative targets ---===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as collectors which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +//   "Accurate Garbage Collection in an Uncooperative Environment" +//   Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// this collector. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shadowstackgc" +#include "llvm/CodeGen/Collectors.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Collector.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/LLVMBuilder.h" +#include "llvm/Analysis/Verifier.h" +#include <cstdlib> + +using namespace llvm; + +namespace { +   +  class VISIBILITY_HIDDEN ShadowStackCollector : public Collector { +    /// RootChain - This is the global linked-list that contains the chain of GC +    /// roots. +    GlobalVariable *Head; +     +    /// StackEntryTy - Abstract type of a link in the shadow stack. +    ///  +    const StructType *StackEntryTy; +     +    /// Roots - GC roots in the current function. Each is a pair of the +    /// intrinsic call and its corresponding alloca. +    std::vector<std::pair<CallInst*,AllocaInst*> > Roots; +     +  public: +    ShadowStackCollector(); +     +    bool initializeCustomLowering(Module &M); +    bool performCustomLowering(Function &F); +     +  private: +    bool IsNullValue(Value *V); +    Constant *GetFrameMap(Function &F); +    const Type* GetConcreteStackEntryType(Function &F); +    void CollectRoots(Function &F); +    static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr, +                                        int Idx1, const char *Name); +    static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr, +                                        int Idx1, int Idx2, const char *Name); +  }; +   +  CollectorRegistry::Add<ShadowStackCollector> +  Y("shadow-stack", +    "Very portable collector for uncooperative code generators"); +   +  /// EscapeEnumerator - This is a little algorithm to find all escape points +  /// from a function so that "finally"-style code can be inserted. In addition +  /// to finding the existing return and unwind instructions, it also (if +  /// necessary) transforms any call instructions into invokes and sends them to +  /// a landing pad. +  ///  +  /// It's wrapped up in a state machine using the same transform C# uses for +  /// 'yield return' enumerators, This transform allows it to be non-allocating. +  class VISIBILITY_HIDDEN EscapeEnumerator { +    Function &F; +    const char *CleanupBBName; +     +    // State. +    int State; +    Function::iterator StateBB, StateE; +    LLVMBuilder Builder; +     +  public: +    EscapeEnumerator(Function &F, const char *N = "cleanup") +      : F(F), CleanupBBName(N), State(0) {} +     +    LLVMBuilder *Next() { +      switch (State) { +      default: +        return 0; +         +      case 0: +        StateBB = F.begin(); +        StateE = F.end(); +        State = 1; +         +      case 1: +        // Find all 'return' and 'unwind' instructions. +        while (StateBB != StateE) { +          BasicBlock *CurBB = StateBB++; +           +          // Branches and invokes do not escape, only unwind and return do. +          TerminatorInst *TI = CurBB->getTerminator(); +          if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI)) +            continue; +           +          Builder.SetInsertPoint(TI->getParent(), TI); +          return &Builder; +        } +         +        State = 2; +         +        // Find all 'call' instructions. +        SmallVector<Instruction*,16> Calls; +        for (Function::iterator BB = F.begin(), +                                E = F.end(); BB != E; ++BB) +          for (BasicBlock::iterator II = BB->begin(), +                                    EE = BB->end(); II != EE; ++II) +            if (CallInst *CI = dyn_cast<CallInst>(II)) +              if (!CI->getCalledFunction() || +                  !CI->getCalledFunction()->getIntrinsicID()) +                Calls.push_back(CI); +         +        if (Calls.empty()) +          return 0; +         +        // Create a cleanup block. +        BasicBlock *CleanupBB = new BasicBlock(CleanupBBName, &F); +        UnwindInst *UI = new UnwindInst(CleanupBB); +         +        // Transform the 'call' instructions into 'invoke's branching to the +        // cleanup block. Go in reverse order to make prettier BB names. +        SmallVector<Value*,16> Args; +        for (unsigned I = Calls.size(); I != 0; ) { +          CallInst *CI = cast<CallInst>(Calls[--I]); +           +          // Split the basic block containing the function call. +          BasicBlock *CallBB = CI->getParent(); +          BasicBlock *NewBB = +            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); +           +          // Remove the unconditional branch inserted at the end of CallBB. +          CallBB->getInstList().pop_back(); +          NewBB->getInstList().remove(CI); +           +          // Create a new invoke instruction. +          Args.clear(); +          Args.append(CI->op_begin() + 1, CI->op_end()); +           +          InvokeInst *II = new InvokeInst(CI->getOperand(0), +                                          NewBB, CleanupBB, +                                          Args.begin(), Args.end(), +                                          CI->getName(), CallBB); +          II->setCallingConv(CI->getCallingConv()); +          II->setParamAttrs(CI->getParamAttrs()); +          CI->replaceAllUsesWith(II); +          delete CI; +        } +         +        Builder.SetInsertPoint(UI->getParent(), UI); +        return &Builder; +      } +    } +  }; + +} + +// ----------------------------------------------------------------------------- + +Collector *llvm::createShadowStackCollector() { +  return new ShadowStackCollector(); +} + +ShadowStackCollector::ShadowStackCollector() : Head(0), StackEntryTy(0) { +  InitRoots = true; +  CustomRoots = true; +} + +Constant *ShadowStackCollector::GetFrameMap(Function &F) { +  // doInitialization creates the abstract type of this value. +   +  Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty); +   +  // Truncate the ShadowStackDescriptor if some metadata is null. +  unsigned NumMeta = 0; +  SmallVector<Constant*,16> Metadata; +  for (unsigned I = 0; I != Roots.size(); ++I) { +    Constant *C = cast<Constant>(Roots[I].first->getOperand(2)); +    if (!C->isNullValue()) +      NumMeta = I + 1; +    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); +  } +   +  Constant *BaseElts[] = { +    ConstantInt::get(Type::Int32Ty, Roots.size(), false), +    ConstantInt::get(Type::Int32Ty, NumMeta, false), +  }; +   +  Constant *DescriptorElts[] = { +    ConstantStruct::get(BaseElts, 2), +    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), +                       Metadata.begin(), NumMeta) +  }; +   +  Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2); +   +  std::string TypeName("gc_map."); +  TypeName += utostr(NumMeta); +  F.getParent()->addTypeName(TypeName, FrameMap->getType()); +   +  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems +  //        that, short of multithreaded LLVM, it should be safe; all that is +  //        necessary is that a simple Module::iterator loop not be invalidated. +  //        Appending to the GlobalVariable list is safe in that sense. +  //  +  //        All of the output passes emit globals last. The ExecutionEngine +  //        explicitly supports adding globals to the module after +  //        initialization. +  //  +  //        Still, if it isn't deemed acceptable, then this transformation needs +  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline +  //        (which uses a FunctionPassManager (which segfaults (not asserts) if +  //        provided a ModulePass))). +  Constant *GV = new GlobalVariable(FrameMap->getType(), true, +                                    GlobalVariable::InternalLinkage, +                                    FrameMap, "__gc_" + F.getName(), +                                    F.getParent()); +   +  Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0), +                              ConstantInt::get(Type::Int32Ty, 0) }; +  return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); +} + +const Type* ShadowStackCollector::GetConcreteStackEntryType(Function &F) { +  // doInitialization creates the generic version of this type. +  std::vector<const Type*> EltTys; +  EltTys.push_back(StackEntryTy); +  for (size_t I = 0; I != Roots.size(); I++) +    EltTys.push_back(Roots[I].second->getAllocatedType()); +  Type *Ty = StructType::get(EltTys); +   +  std::string TypeName("gc_stackentry."); +  TypeName += F.getName(); +  F.getParent()->addTypeName(TypeName, Ty); +   +  return Ty; +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackCollector::initializeCustomLowering(Module &M) { +  // struct FrameMap { +  //   int32_t NumRoots; // Number of roots in stack frame. +  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots. +  //   void *Meta[];     // May be absent for roots without metadata. +  // }; +  std::vector<const Type*> EltTys; +  EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :) +  EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array. +  StructType *FrameMapTy = StructType::get(EltTys); +  M.addTypeName("gc_map", FrameMapTy); +  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); +   +  // struct StackEntry { +  //   ShadowStackEntry *Next; // Caller's stack entry. +  //   FrameMap *Map;          // Pointer to constant FrameMap. +  //   void *Roots[];          // Stack roots (in-place array, so we pretend). +  // }; +  OpaqueType *RecursiveTy = OpaqueType::get(); +   +  EltTys.clear(); +  EltTys.push_back(PointerType::getUnqual(RecursiveTy)); +  EltTys.push_back(FrameMapPtrTy); +  PATypeHolder LinkTyH = StructType::get(EltTys); +   +  RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); +  StackEntryTy = cast<StructType>(LinkTyH.get()); +  const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); +  M.addTypeName("gc_stackentry", LinkTyH.get());  // FIXME: Is this safe from +                                                  //        a FunctionPass? +   +  // Get the root chain if it already exists. +  Head = M.getGlobalVariable("llvm_gc_root_chain"); +  if (!Head) { +    // If the root chain does not exist, insert a new one with linkonce +    // linkage! +    Head = new GlobalVariable(StackEntryPtrTy, false, +                              GlobalValue::LinkOnceLinkage, +                              Constant::getNullValue(StackEntryPtrTy), +                              "llvm_gc_root_chain", &M); +  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { +    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); +    Head->setLinkage(GlobalValue::LinkOnceLinkage); +  } +   +  return true; +} + +bool ShadowStackCollector::IsNullValue(Value *V) { +  if (Constant *C = dyn_cast<Constant>(V)) +    return C->isNullValue(); +  return false; +} + +void ShadowStackCollector::CollectRoots(Function &F) { +  // FIXME: Account for original alignment. Could fragment the root array. +  //   Approach 1: Null initialize empty slots at runtime. Yuck. +  //   Approach 2: Emit a map of the array instead of just a count. +   +  assert(Roots.empty() && "Not cleaned up?"); +   +  SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots; +   +  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) +    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) +      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) +        if (Function *F = CI->getCalledFunction()) +          if (F->getIntrinsicID() == Intrinsic::gcroot) { +            std::pair<CallInst*,AllocaInst*> Pair = std::make_pair( +              CI, cast<AllocaInst>( +                    IntrinsicInst::StripPointerCasts(CI->getOperand(1)))); +            if (IsNullValue(CI->getOperand(2))) +              Roots.push_back(Pair); +            else +              MetaRoots.push_back(Pair); +          } +   +  // Number roots with metadata (usually empty) at the beginning, so that the +  // FrameMap::Meta array can be elided. +  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst * +ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr, +                                int Idx, int Idx2, const char *Name) { +  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), +                       ConstantInt::get(Type::Int32Ty, Idx), +                       ConstantInt::get(Type::Int32Ty, Idx2) }; +  return B.CreateGEP(BasePtr, Indices, Indices + 3, Name); +} + +GetElementPtrInst * +ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr, +                                int Idx, const char *Name) { +  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), +                       ConstantInt::get(Type::Int32Ty, Idx) }; +  return B.CreateGEP(BasePtr, Indices, Indices + 2, Name); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackCollector::performCustomLowering(Function &F) { +  // Find calls to llvm.gcroot. +  CollectRoots(F); +   +  // If there are no roots in this function, then there is no need to add a +  // stack map entry for it. +  if (Roots.empty()) +    return false; +   +  // Build the constant map and figure the type of the shadow stack entry. +  Value *FrameMap = GetFrameMap(F); +  const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); +   +  // Build the shadow stack entry at the very start of the function. +  BasicBlock::iterator IP = F.getEntryBlock().begin(); +  LLVMBuilder AtEntry(IP->getParent(), IP); +   +  Instruction *StackEntry   = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, +                                                   "gc_frame"); +   +  while (isa<AllocaInst>(IP)) ++IP; +  AtEntry.SetInsertPoint(IP->getParent(), IP); +   +  // Initialize the map pointer and load the current head of the shadow stack. +  Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead"); +  Instruction *EntryMapPtr  = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map"); +                              AtEntry.CreateStore(FrameMap, EntryMapPtr); +   +  // After all the allocas... +  for (unsigned I = 0, E = Roots.size(); I != E; ++I) { +    // For each root, find the corresponding slot in the aggregate... +    Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root"); +     +    // And use it in lieu of the alloca. +    AllocaInst *OriginalAlloca = Roots[I].second; +    SlotPtr->takeName(OriginalAlloca); +    OriginalAlloca->replaceAllUsesWith(SlotPtr); +  } +   +  // Move past the original stores inserted by Collector::InitRoots. This isn't +  // really necessary (the collector would never see the intermediate state), +  // but it's nicer not to push the half-initialized entry onto the stack. +  while (isa<StoreInst>(IP)) ++IP; +  AtEntry.SetInsertPoint(IP->getParent(), IP); +   +  // Push the entry onto the shadow stack. +  Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next"); +  Instruction *NewHeadVal   = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead"); +                              AtEntry.CreateStore(CurrentHead, EntryNextPtr); +                              AtEntry.CreateStore(NewHeadVal, Head); +   +  // For each instruction that escapes... +  EscapeEnumerator EE(F, "gc_cleanup"); +  while (LLVMBuilder *AtExit = EE.Next()) { +    // Pop the entry from the shadow stack. Don't reuse CurrentHead from +    // AtEntry, since that would make the value live for the entire function. +    Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0, +                                           "gc_frame.next"); +    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); +                       AtExit->CreateStore(SavedHead, Head); +  } +   +  // Delete the original allocas (which are no longer used) and the intrinsic +  // calls (which are no longer valid). Doing this last avoids invalidating +  // iterators. +  for (unsigned I = 0, E = Roots.size(); I != E; ++I) { +    Roots[I].first->eraseFromParent(); +    Roots[I].second->eraseFromParent(); +  } +   +  F.dump(); +   +  Roots.clear(); +  return true; +} diff --git a/llvm/lib/Transforms/Scalar/LowerGC.cpp b/llvm/lib/Transforms/Scalar/LowerGC.cpp index 89749862d24..e69de29bb2d 100644 --- a/llvm/lib/Transforms/Scalar/LowerGC.cpp +++ b/llvm/lib/Transforms/Scalar/LowerGC.cpp @@ -1,350 +0,0 @@ -//===-- LowerGC.cpp - Provide GC support for targets that don't -----------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements lowering for the llvm.gc* intrinsics for targets that do -// not natively support them (which includes the C backend).  Note that the code -// generated is not as efficient as it would be for targets that natively -// support the GC intrinsics, but it is useful for getting new targets -// up-and-running quickly. -// -// This pass implements the code transformation described in this paper: -//   "Accurate Garbage Collection in an Uncooperative Environment" -//   Fergus Henderson, ISMM, 2002 -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lowergc" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" -#include "llvm/ADT/SmallVector.h" -using namespace llvm; - -namespace { -  class VISIBILITY_HIDDEN LowerGC : public FunctionPass { -    /// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the -    /// llvm.gcread/llvm.gcwrite/llvm.gcroot intrinsics. -    Function *GCRootInt, *GCReadInt, *GCWriteInt; - -    /// GCRead/GCWrite - These are the functions provided by the garbage -    /// collector for read/write barriers. -    Constant *GCRead, *GCWrite; - -    /// RootChain - This is the global linked-list that contains the chain of GC -    /// roots. -    GlobalVariable *RootChain; - -    /// MainRootRecordType - This is the type for a function root entry if it -    /// had zero roots. -    const Type *MainRootRecordType; -  public: -    static char ID; // Pass identification, replacement for typeid -    LowerGC() : FunctionPass((intptr_t)&ID),  -                GCRootInt(0), GCReadInt(0), GCWriteInt(0), -                GCRead(0), GCWrite(0), RootChain(0), MainRootRecordType(0) {} -    virtual bool doInitialization(Module &M); -    virtual bool runOnFunction(Function &F); - -  private: -    const StructType *getRootRecordType(unsigned NumRoots); -  }; - -  char LowerGC::ID = 0; -  RegisterPass<LowerGC> -  X("lowergc", "Lower GC intrinsics, for GCless code generators"); -} - -/// createLowerGCPass - This function returns an instance of the "lowergc" -/// pass, which lowers garbage collection intrinsics to normal LLVM code. -FunctionPass *llvm::createLowerGCPass() { -  return new LowerGC(); -} - -/// getRootRecordType - This function creates and returns the type for a root -/// record containing 'NumRoots' roots. -const StructType *LowerGC::getRootRecordType(unsigned NumRoots) { -  // Build a struct that is a type used for meta-data/root pairs. -  std::vector<const Type *> ST; -  ST.push_back(GCRootInt->getFunctionType()->getParamType(0)); -  ST.push_back(GCRootInt->getFunctionType()->getParamType(1)); -  StructType *PairTy = StructType::get(ST); - -  // Build the array of pairs. -  ArrayType *PairArrTy = ArrayType::get(PairTy, NumRoots); - -  // Now build the recursive list type. -  PATypeHolder RootListH = -    MainRootRecordType ? (Type*)MainRootRecordType : (Type*)OpaqueType::get(); -  ST.clear(); -  ST.push_back(PointerType::getUnqual(RootListH));         // Prev pointer -  ST.push_back(Type::Int32Ty);                       // NumElements in array -  ST.push_back(PairArrTy);                           // The pairs -  StructType *RootList = StructType::get(ST); -  if (MainRootRecordType) -    return RootList; - -  assert(NumRoots == 0 && "The main struct type should have zero entries!"); -  cast<OpaqueType>((Type*)RootListH.get())->refineAbstractTypeTo(RootList); -  MainRootRecordType = RootListH; -  return cast<StructType>(RootListH.get()); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now.  If -/// not, this pass does not do anything. -bool LowerGC::doInitialization(Module &M) { -  GCRootInt  = M.getFunction("llvm.gcroot"); -  GCReadInt  = M.getFunction("llvm.gcread"); -  GCWriteInt = M.getFunction("llvm.gcwrite"); -  if (!GCRootInt && !GCReadInt && !GCWriteInt) return false; - -  PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty); -  PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr); - -  // If the program is using read/write barriers, find the implementations of -  // them from the GC runtime library. -  if (GCReadInt)        // Make:  sbyte* %llvm_gc_read(sbyte**) -    GCRead = M.getOrInsertFunction("llvm_gc_read", VoidPtr, VoidPtr, VoidPtrPtr, -                                   (Type *)0); -  if (GCWriteInt)       // Make:  void %llvm_gc_write(sbyte*, sbyte**) -    GCWrite = M.getOrInsertFunction("llvm_gc_write", Type::VoidTy, -                                    VoidPtr, VoidPtr, VoidPtrPtr, (Type *)0); - -  // If the program has GC roots, get or create the global root list. -  if (GCRootInt) { -    const StructType *RootListTy = getRootRecordType(0); -    const Type *PRLTy = PointerType::getUnqual(RootListTy); -    M.addTypeName("llvm_gc_root_ty", RootListTy); - -    // Get the root chain if it already exists. -    RootChain = M.getGlobalVariable("llvm_gc_root_chain", PRLTy); -    if (RootChain == 0) { -      // If the root chain does not exist, insert a new one with linkonce -      // linkage! -      RootChain = new GlobalVariable(PRLTy, false, -                                     GlobalValue::LinkOnceLinkage, -                                     Constant::getNullValue(PRLTy), -                                     "llvm_gc_root_chain", &M); -    } else if (RootChain->hasExternalLinkage() && RootChain->isDeclaration()) { -      RootChain->setInitializer(Constant::getNullValue(PRLTy)); -      RootChain->setLinkage(GlobalValue::LinkOnceLinkage); -    } -  } -  return true; -} - -/// Coerce - If the specified operand number of the specified instruction does -/// not have the specified type, insert a cast. Note that this only uses BitCast -/// because the types involved are all pointers. -static void Coerce(Instruction *I, unsigned OpNum, Type *Ty) { -  if (I->getOperand(OpNum)->getType() != Ty) { -    if (Constant *C = dyn_cast<Constant>(I->getOperand(OpNum))) -      I->setOperand(OpNum, ConstantExpr::getBitCast(C, Ty)); -    else { -      CastInst *CI = new BitCastInst(I->getOperand(OpNum), Ty, "", I); -      I->setOperand(OpNum, CI); -    } -  } -} - -/// runOnFunction - If the program is using GC intrinsics, replace any -/// read/write intrinsics with the appropriate read/write barrier calls, then -/// inline them.  Finally, build the data structures for -bool LowerGC::runOnFunction(Function &F) { -  // Quick exit for programs that are not using GC mechanisms. -  if (!GCRootInt && !GCReadInt && !GCWriteInt) return false; - -  PointerType *VoidPtr    = PointerType::getUnqual(Type::Int8Ty); -  PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr); - -  // If there are read/write barriers in the program, perform a quick pass over -  // the function eliminating them.  While we are at it, remember where we see -  // calls to llvm.gcroot. -  std::vector<CallInst*> GCRoots; -  std::vector<CallInst*> NormalCalls; - -  bool MadeChange = false; -  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) -    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) -      if (CallInst *CI = dyn_cast<CallInst>(II++)) { -        if (!CI->getCalledFunction() || -            !CI->getCalledFunction()->isIntrinsic()) -          NormalCalls.push_back(CI);   // Remember all normal function calls. - -        if (Function *F = CI->getCalledFunction()) -          if (F == GCRootInt) -            GCRoots.push_back(CI); -          else if (F == GCReadInt || F == GCWriteInt) { -            if (F == GCWriteInt) { -              // Change a llvm.gcwrite call to call llvm_gc_write instead. -              CI->setOperand(0, GCWrite); -              // Insert casts of the operands as needed. -              Coerce(CI, 1, VoidPtr); -              Coerce(CI, 2, VoidPtr); -              Coerce(CI, 3, VoidPtrPtr); -            } else { -              Coerce(CI, 1, VoidPtr); -              Coerce(CI, 2, VoidPtrPtr); -              if (CI->getType() == VoidPtr) { -                CI->setOperand(0, GCRead); -              } else { -                // Create a whole new call to replace the old one. -                 -                // It sure would be nice to pass op_begin()+1, -                // op_begin()+2 but it runs into trouble with -                // CallInst::init's &*iterator, which requires a -                // conversion from Use* to Value*.  The conversion -                // from Use to Value * is not useful because the -                // memory for Value * won't be contiguous. -                Value* Args[] = { -                  CI->getOperand(1), -                  CI->getOperand(2)  -                }; -                CallInst *NC = new CallInst(GCRead, Args, Args + 2, -                                            CI->getName(), CI); -                // These functions only deal with ptr type results so BitCast -                // is the correct kind of cast (no-op cast). -                Value *NV = new BitCastInst(NC, CI->getType(), "", CI); -                CI->replaceAllUsesWith(NV); -                BB->getInstList().erase(CI); -                CI = NC; -              } -            } - -            MadeChange = true; -          } -      } - -  // If there are no GC roots in this function, then there is no need to create -  // a GC list record for it. -  if (GCRoots.empty()) return MadeChange; - -  // Okay, there are GC roots in this function.  On entry to the function, add a -  // record to the llvm_gc_root_chain, and remove it on exit. - -  // Create the alloca, and zero it out. -  const StructType *RootListTy = getRootRecordType(GCRoots.size()); -  AllocaInst *AI = new AllocaInst(RootListTy, 0, "gcroots", F.begin()->begin()); - -  // Insert the memset call after all of the allocas in the function. -  BasicBlock::iterator IP = AI; -  while (isa<AllocaInst>(IP)) ++IP; - -  Constant *Zero = ConstantInt::get(Type::Int32Ty, 0); -  Constant *One  = ConstantInt::get(Type::Int32Ty, 1); - -  Value *Idx[2] = { Zero, Zero }; -   -  // Get a pointer to the prev pointer. -  Value *PrevPtrPtr = new GetElementPtrInst(AI, Idx, Idx + 2, -                                            "prevptrptr", IP); - -  // Load the previous pointer. -  Value *PrevPtr = new LoadInst(RootChain, "prevptr", IP); -  // Store the previous pointer into the prevptrptr -  new StoreInst(PrevPtr, PrevPtrPtr, IP); - -  // Set the number of elements in this record. -  Idx[1] = One; -  Value *NumEltsPtr = new GetElementPtrInst(AI, Idx, Idx + 2, -                                            "numeltsptr", IP); -  new StoreInst(ConstantInt::get(Type::Int32Ty, GCRoots.size()), NumEltsPtr,IP); - -  Value* Par[4]; -  Par[0] = Zero; -  Par[1] = ConstantInt::get(Type::Int32Ty, 2); - -  const PointerType *PtrLocTy = -    cast<PointerType>(GCRootInt->getFunctionType()->getParamType(0)); -  Constant *Null = ConstantPointerNull::get(PtrLocTy); - -  // Initialize all of the gcroot records now. -  for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) { -    // Initialize the meta-data pointer. -    Par[2] = ConstantInt::get(Type::Int32Ty, i); -    Par[3] = One; -    Value *MetaDataPtr = new GetElementPtrInst(AI, Par, Par + 4, -                                               "MetaDataPtr", IP); -    assert(isa<Constant>(GCRoots[i]->getOperand(2)) && "Must be a constant"); -    new StoreInst(GCRoots[i]->getOperand(2), MetaDataPtr, IP); - -    // Initialize the root pointer to null on entry to the function. -    Par[3] = Zero; -    Value *RootPtrPtr = new GetElementPtrInst(AI, Par, Par + 4, -                                              "RootEntPtr", IP); -    new StoreInst(Null, RootPtrPtr, IP); - -    // Each occurrance of the llvm.gcroot intrinsic now turns into an -    // initialization of the slot with the address. -    new StoreInst(GCRoots[i]->getOperand(1), RootPtrPtr, GCRoots[i]); -  } - -  // Now that the record is all initialized, store the pointer into the global -  // pointer. -  Value *C = new BitCastInst(AI, PointerType::getUnqual(MainRootRecordType), "", IP); -  new StoreInst(C, RootChain, IP); - -  // Eliminate all the gcroot records now. -  for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) -    GCRoots[i]->getParent()->getInstList().erase(GCRoots[i]); - -  // On exit from the function we have to remove the entry from the GC root -  // chain.  Doing this is straight-forward for return and unwind instructions: -  // just insert the appropriate copy. -  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) -    if (isa<UnwindInst>(BB->getTerminator()) || -        isa<ReturnInst>(BB->getTerminator())) { -      // We could reuse the PrevPtr loaded on entry to the function, but this -      // would make the value live for the whole function, which is probably a -      // bad idea.  Just reload the value out of our stack entry. -      PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", BB->getTerminator()); -      new StoreInst(PrevPtr, RootChain, BB->getTerminator()); -    } - -  // If an exception is thrown from a callee we have to make sure to -  // unconditionally take the record off the stack.  For this reason, we turn -  // all call instructions into invoke whose cleanup pops the entry off the -  // stack.  We only insert one cleanup block, which is shared by all invokes. -  if (!NormalCalls.empty()) { -    // Create the shared cleanup block. -    BasicBlock *Cleanup = new BasicBlock("gc_cleanup", &F); -    UnwindInst *UI = new UnwindInst(Cleanup); -    PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", UI); -    new StoreInst(PrevPtr, RootChain, UI); - -    // Loop over all of the function calls, turning them into invokes. -    while (!NormalCalls.empty()) { -      CallInst *CI = NormalCalls.back(); -      BasicBlock *CBB = CI->getParent(); -      NormalCalls.pop_back(); - -      // Split the basic block containing the function call. -      BasicBlock *NewBB = CBB->splitBasicBlock(CI, CBB->getName()+".cont"); - -      // Remove the unconditional branch inserted at the end of the CBB. -      CBB->getInstList().pop_back(); -      NewBB->getInstList().remove(CI); - -      // Create a new invoke instruction. -      std::vector<Value*> Args(CI->op_begin()+1, CI->op_end()); - -      Value *II = new InvokeInst(CI->getCalledValue(), NewBB, Cleanup, -                                 Args.begin(), Args.end(), CI->getName(), CBB); -      cast<InvokeInst>(II)->setCallingConv(CI->getCallingConv()); -      cast<InvokeInst>(II)->setParamAttrs(CI->getParamAttrs()); -      CI->replaceAllUsesWith(II); -      delete CI; -    } -  } - -  return true; -} diff --git a/llvm/runtime/GC/SemiSpace/semispace.c b/llvm/runtime/GC/SemiSpace/semispace.c index b8ef188fed6..40af1cb2e37 100644 --- a/llvm/runtime/GC/SemiSpace/semispace.c +++ b/llvm/runtime/GC/SemiSpace/semispace.c @@ -97,24 +97,26 @@ void llvm_gc_write(void *V, void *ObjPtr, void **FieldPtr) { *FieldPtr = V; }   * FIXME: This should be in a code-generator specific library, but for now this   * will work for all code generators.   */ -typedef struct GCRoot { -  void **RootPtr; -  void *Meta; -} GCRoot; - -typedef struct GCRoots { -  struct GCRoots *Next; -  unsigned NumRoots; -  GCRoot RootRecords[]; -} GCRoots; -GCRoots *llvm_gc_root_chain; +struct FrameMap { +  int32_t NumRoots; // Number of roots in stack frame. +  int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots. +  void *Meta[];     // May be absent for roots without metadata. +}; + +struct StackEntry { +  ShadowStackEntry *Next; // Caller's stack entry. +  const FrameMap *Map;    // Pointer to constant FrameMap. +  void *Roots[];          // Stack roots (in-place array). +}; +StackEntry *llvm_gc_root_chain;  void llvm_cg_walk_gcroots(void (*FP)(void **Root, void *Meta)) { -  GCRoots *R = llvm_gc_root_chain; -  for (; R; R = R->Next) { +  for (StackEntry *R; R; R = R->Next) {      unsigned i, e; -    for (i = 0, e = R->NumRoots; i != e; ++i) -      FP(R->RootRecords[i].RootPtr, R->RootRecords[i].Meta); +    for (i = 0, e = R->NumMeta; i != e; ++i) +      FP(&R->Roots[i], R->Map->Meta[i]); +    for (e = R->NumRoots; i != e; ++i) +      FP(&R->Roots[i], NULL);    }  }  /* END FIXME! */ diff --git a/llvm/test/CodeGen/Generic/GC/redundant_init.ll b/llvm/test/CodeGen/Generic/GC/redundant_init.ll new file mode 100644 index 00000000000..44996034748 --- /dev/null +++ b/llvm/test/CodeGen/Generic/GC/redundant_init.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s | llc -march=x86 | \ +; RUN:   ignore grep {movl..0} | count 0 + +%struct.obj = type { i8*, %struct.obj* } + +declare void @g() gc "shadow-stack" + +define void @f(i8* %o) gc "shadow-stack" { +entry: +	%root = alloca i8* +	call void @llvm.gcroot(i8** %root, i8* null) +	store i8* %o, i8** %root +	call void @g() +	ret void +} + +declare void @llvm.gcroot(i8**, i8*)  | 

