diff options
| author | Kostya Serebryany <kcc@google.com> | 2012-08-21 08:24:25 +0000 | 
|---|---|---|
| committer | Kostya Serebryany <kcc@google.com> | 2012-08-21 08:24:25 +0000 | 
| commit | f4be019fba4c666087fb1784144b961ab5ef5d1c (patch) | |
| tree | 37541e24829cfafb96c66dbdf7a18df0d5a4556d /llvm | |
| parent | bfcfdeb563d02d494e43268f72da4edf6c2bfe4e (diff) | |
| download | bcm5719-llvm-f4be019fba4c666087fb1784144b961ab5ef5d1c.tar.gz bcm5719-llvm-f4be019fba4c666087fb1784144b961ab5ef5d1c.zip  | |
[asan] add code to detect global initialization fiasco in C/C++. The sub-pass is off by default for now. Patch by Reid Watson. Note: this patch changes the interface between LLVM and compiler-rt parts of asan. The corresponding patch to compiler-rt will follow.
llvm-svn: 162268
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 234 | ||||
| -rw-r--r-- | llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll | 36 | 
2 files changed, 207 insertions, 63 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 17b83ceee19..06f4d2fedd6 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -61,6 +61,8 @@ static const int   kAsanCtorAndCtorPriority = 1;  static const char *kAsanReportErrorTemplate = "__asan_report_";  static const char *kAsanRegisterGlobalsName = "__asan_register_globals";  static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; +static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";  static const char *kAsanInitName = "__asan_init";  static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";  static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; @@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",  // This flag may need to be replaced with -f[no]asan-globals.  static cl::opt<bool> ClGlobals("asan-globals",         cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInitializers("asan-initialization-order", +       cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));  static cl::opt<bool> ClMemIntrin("asan-memintrin",         cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));  // This flag may need to be replaced with -fasan-blacklist. @@ -171,6 +175,8 @@ struct AddressSanitizer : public ModulePass {                                     Instruction *InsertBefore, bool IsWrite);    Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);    bool handleFunction(Module &M, Function &F); +  void createInitializerPoisonCalls(Module &M, +                                    Value *FirstAddr, Value *LastAddr);    bool maybeInsertAsanInitAtFunctionEntry(Function &F);    bool poisonStackInFunction(Module &M, Function &F);    virtual bool runOnModule(Module &M); @@ -178,7 +184,6 @@ struct AddressSanitizer : public ModulePass {    static char ID;  // Pass identification, replacement for typeid   private: -    uint64_t getAllocaSizeInBytes(AllocaInst *AI) {      Type *Ty = AI->getAllocatedType();      uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); @@ -194,9 +199,12 @@ struct AddressSanitizer : public ModulePass {    }    Function *checkInterfaceFunction(Constant *FuncOrBitcast); +  bool ShouldInstrumentGlobal(GlobalVariable *G);    void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,                     Value *ShadowBase, bool DoPoison);    bool LooksLikeCodeInBug11395(Instruction *I); +  void FindDynamicInitializers(Module &M); +  bool HasDynamicInitializer(GlobalVariable *G);    LLVMContext *C;    TargetData *TD; @@ -213,6 +221,7 @@ struct AddressSanitizer : public ModulePass {    // This array is indexed by AccessIsWrite and log2(AccessSize).    Function *AsanErrorCallback[2][kNumberOfAccessSizes];    InlineAsm *EmptyAsm; +  SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;  };  }  // namespace @@ -358,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {    return NULL;  } +void AddressSanitizer::FindDynamicInitializers(Module& M) { +  // Clang generates metadata identifying all dynamically initialized globals. +  NamedMDNode *DynamicGlobals = +      M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); +  if (!DynamicGlobals) +    return; +  for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { +    MDNode *MDN = DynamicGlobals->getOperand(i); +    assert(MDN->getNumOperands() == 1); +    Value *VG = MDN->getOperand(0); +    // The optimizer may optimize away a global entirely, in which case we +    // cannot instrument access to it. +    if (!VG) +      continue; + +    GlobalVariable *G = cast<GlobalVariable>(VG); +    DynamicallyInitializedGlobals.insert(G); +  } +} +// Returns true if a global variable is initialized dynamically in this TU. +bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { +  return DynamicallyInitializedGlobals.count(G); +} +  void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {    bool IsWrite;    Value *Addr = isInterestingMemoryAccess(I, &IsWrite);    assert(Addr); -  if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { -    // We are accessing a global scalar variable. Nothing to catch here. -    return; +  if (ClOpt && ClOptGlobals) { +    if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { +      // If initialization order checking is disabled, a simple access to a +      // dynamically initialized global is always valid. +      if (!ClInitializers) +        return; +      // If a global variable does not have dynamic initialization we don't +      // have to instrument it.  However, if a global has external linkage, we +      // assume it has dynamic initialization, as it may have an initializer +      // in a different TU. +      if (G->getLinkage() != GlobalVariable::ExternalLinkage && +          !HasDynamicInitializer(G)) +        return; +    }    } +    Type *OrigPtrTy = Addr->getType();    Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); @@ -462,68 +507,106 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,    Crash->setDebugLoc(OrigIns->getDebugLoc());  } +void AddressSanitizer::createInitializerPoisonCalls(Module &M, +                                                    Value *FirstAddr, +                                                    Value *LastAddr) { +  // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. +  Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); +  // If that function is not present, this TU contains no globals, or they have +  // all been optimized away +  if (!GlobalInit) +    return; + +  // Set up the arguments to our poison/unpoison functions. +  IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + +  // Declare our poisoning and unpoisoning functions. +  Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( +      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); +  AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); +  Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( +      kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); +  AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); + +  // Add a call to poison all external globals before the given function starts. +  IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + +  // Add calls to unpoison all globals before each return instruction. +  for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); +      I != E; ++I) { +    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { +      CallInst::Create(AsanUnpoisonGlobals, "", RI); +    } +  } +} + +bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { +  Type *Ty = cast<PointerType>(G->getType())->getElementType(); +  DEBUG(dbgs() << "GLOBAL: " << *G); + +  if (!Ty->isSized()) return false; +  if (!G->hasInitializer()) return false; +  // Touch only those globals that will not be defined in other modules. +  // Don't handle ODR type linkages since other modules may be built w/o asan. +  if (G->getLinkage() != GlobalVariable::ExternalLinkage && +      G->getLinkage() != GlobalVariable::PrivateLinkage && +      G->getLinkage() != GlobalVariable::InternalLinkage) +    return false; +  // Two problems with thread-locals: +  //   - The address of the main thread's copy can't be computed at link-time. +  //   - Need to poison all copies, not just the main thread's one. +  if (G->isThreadLocal()) +    return false; +  // For now, just ignore this Alloca if the alignment is large. +  if (G->getAlignment() > RedzoneSize) return false; + +  // Ignore all the globals with the names starting with "\01L_OBJC_". +  // Many of those are put into the .cstring section. The linker compresses +  // that section by removing the spare \0s after the string terminator, so +  // our redzones get broken. +  if ((G->getName().find("\01L_OBJC_") == 0) || +      (G->getName().find("\01l_OBJC_") == 0)) { +    DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); +    return false; +  } + +  if (G->hasSection()) { +    StringRef Section(G->getSection()); +    // Ignore the globals from the __OBJC section. The ObjC runtime assumes +    // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to +    // them. +    if ((Section.find("__OBJC,") == 0) || +        (Section.find("__DATA, __objc_") == 0)) { +      DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); +      return false; +    } +    // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 +    // Constant CFString instances are compiled in the following way: +    //  -- the string buffer is emitted into +    //     __TEXT,__cstring,cstring_literals +    //  -- the constant NSConstantString structure referencing that buffer +    //     is placed into __DATA,__cfstring +    // Therefore there's no point in placing redzones into __DATA,__cfstring. +    // Moreover, it causes the linker to crash on OS X 10.7 +    if (Section.find("__DATA,__cfstring") == 0) { +      DEBUG(dbgs() << "Ignoring CFString: " << *G); +      return false; +    } +  } + +  return true; +} +  // This function replaces all global variables with new variables that have  // trailing redzones. It also creates a function that poisons  // redzones and inserts this function into llvm.global_ctors.  bool AddressSanitizer::insertGlobalRedzones(Module &M) {    SmallVector<GlobalVariable *, 16> GlobalsToChange; -  for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), -       E = M.getGlobalList().end(); G != E; ++G) { -    Type *Ty = cast<PointerType>(G->getType())->getElementType(); -    DEBUG(dbgs() << "GLOBAL: " << *G); - -    if (!Ty->isSized()) continue; -    if (!G->hasInitializer()) continue; -    // Touch only those globals that will not be defined in other modules. -    // Don't handle ODR type linkages since other modules may be built w/o asan. -    if (G->getLinkage() != GlobalVariable::ExternalLinkage && -        G->getLinkage() != GlobalVariable::PrivateLinkage && -        G->getLinkage() != GlobalVariable::InternalLinkage) -      continue; -    // Two problems with thread-locals: -    //   - The address of the main thread's copy can't be computed at link-time. -    //   - Need to poison all copies, not just the main thread's one. -    if (G->isThreadLocal()) -      continue; -    // For now, just ignore this Alloca if the alignment is large. -    if (G->getAlignment() > RedzoneSize) continue; - -    // Ignore all the globals with the names starting with "\01L_OBJC_". -    // Many of those are put into the .cstring section. The linker compresses -    // that section by removing the spare \0s after the string terminator, so -    // our redzones get broken. -    if ((G->getName().find("\01L_OBJC_") == 0) || -        (G->getName().find("\01l_OBJC_") == 0)) { -      DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); -      continue; -    } - -    if (G->hasSection()) { -      StringRef Section(G->getSection()); -      // Ignore the globals from the __OBJC section. The ObjC runtime assumes -      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to -      // them. -      if ((Section.find("__OBJC,") == 0) || -          (Section.find("__DATA, __objc_") == 0)) { -        DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); -        continue; -      } -      // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 -      // Constant CFString instances are compiled in the following way: -      //  -- the string buffer is emitted into -      //     __TEXT,__cstring,cstring_literals -      //  -- the constant NSConstantString structure referencing that buffer -      //     is placed into __DATA,__cfstring -      // Therefore there's no point in placing redzones into __DATA,__cfstring. -      // Moreover, it causes the linker to crash on OS X 10.7 -      if (Section.find("__DATA,__cfstring") == 0) { -        DEBUG(dbgs() << "Ignoring CFString: " << *G); -        continue; -      } -    } - -    GlobalsToChange.push_back(G); +  for (Module::GlobalListType::iterator G = M.global_begin(), +       E = M.global_end(); G != E; ++G) { +    if (ShouldInstrumentGlobal(G)) +      GlobalsToChange.push_back(G);    }    size_t n = GlobalsToChange.size(); @@ -534,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {    //   size_t size;    //   size_t size_with_redzone;    //   const char *name; +  //   size_t has_dynamic_init;    // We initialize an array of such structures and pass it to a run-time call.    StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, -                                               IntptrTy, IntptrTy, NULL); -  SmallVector<Constant *, 16> Initializers(n); +                                               IntptrTy, IntptrTy, +                                               IntptrTy, NULL); +  SmallVector<Constant *, 16> Initializers(n), DynamicInit;    IRBuilder<> IRB(CtorInsertBefore); +  if (ClInitializers) +    FindDynamicInitializers(M); + +  // The addresses of the first and last dynamically initialized globals in +  // this TU.  Used in initialization order checking. +  Value *FirstDynamic = 0, *LastDynamic = 0; +    for (size_t i = 0; i < n; i++) {      GlobalVariable *G = GlobalsToChange[i];      PointerType *PtrTy = cast<PointerType>(G->getType()); @@ -549,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {      uint64_t RightRedzoneSize = RedzoneSize +          (RedzoneSize - (SizeInBytes % RedzoneSize));      Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); +    // Determine whether this global should be poisoned in initialization. +    bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);      StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);      Constant *NewInitializer = ConstantStruct::get( @@ -583,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {          ConstantInt::get(IntptrTy, SizeInBytes),          ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),          ConstantExpr::getPointerCast(Name, IntptrTy), +        ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),          NULL); + +    // Populate the first and last globals declared in this TU. +    if (ClInitializers && GlobalHasDynamicInitializer) { +      LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); +      if (FirstDynamic == 0) +        FirstDynamic = LastDynamic; +    } +      DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);    } @@ -592,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {        M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,        ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); +  // Create calls for poisoning before initializers run and unpoisoning after. +  if (ClInitializers && FirstDynamic && LastDynamic) +    createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); +    Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( -      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); +      kAsanRegisterGlobalsName, IRB.getVoidTy(), +      IntptrTy, IntptrTy, NULL));    AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);    IRB.CreateCall2(AsanRegisterGlobals, diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll new file mode 100644 index 00000000000..472551654e5 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +@xxx = global i32 0, align 4 +; Clang will emit the following metadata identifying @xxx as dynamically +; initialized. +!0 = metadata !{i32* @xxx} +!llvm.asan.dynamically_initialized_globals = !{!0} + +define i32 @initializer() uwtable { +entry: +  ret i32 42 +} + +define internal void @__cxx_global_var_init() section ".text.startup" { +entry: +  %call = call i32 @initializer() +  store i32 %call, i32* @xxx, align 4 +  ret void +} + +define internal void @_GLOBAL__I_a() address_safety section ".text.startup" { +entry: +  call void @__cxx_global_var_init() +  ret void +} + +; Clang indicated that @xxx was dynamically initailized. +; __asan_{before,after}_dynamic_init should be called from _GLOBAL__I_a + +; CHECK: define internal void @_GLOBAL__I_a +; CHECK-NOT: ret +; CHECK: call void @__asan_before_dynamic_init +; CHECK: call void @__cxx_global_var_init +; CHECK: call void @__asan_after_dynamic_init +; CHECK: ret  | 

