diff options
Diffstat (limited to 'compiler-rt/lib')
| -rw-r--r-- | compiler-rt/lib/msan/msan.cc | 9 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/msan.h | 7 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/msan_interceptors.cc | 20 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/msan_interface_internal.h | 4 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/msan_linux.cc | 39 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/tests/CMakeLists.txt | 17 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/tests/msan_loadable.cc | 45 | ||||
| -rw-r--r-- | compiler-rt/lib/msan/tests/msan_test.cc | 49 | ||||
| -rw-r--r-- | compiler-rt/lib/msandr/msandr.cc | 91 | 
9 files changed, 262 insertions, 19 deletions
diff --git a/compiler-rt/lib/msan/msan.cc b/compiler-rt/lib/msan/msan.cc index cfa0741828e..96f99d4a8e4 100644 --- a/compiler-rt/lib/msan/msan.cc +++ b/compiler-rt/lib/msan/msan.cc @@ -59,6 +59,7 @@ static THREADLOCAL struct {  } __msan_stack_bounds;  static THREADLOCAL bool is_in_symbolizer; +static THREADLOCAL bool is_in_loader;  extern "C" const int __msan_track_origins;  int __msan_get_track_origins() { @@ -87,6 +88,14 @@ void EnterSymbolizer() { is_in_symbolizer = true; }  void ExitSymbolizer()  { is_in_symbolizer = false; }  bool IsInSymbolizer() { return is_in_symbolizer; } +void EnterLoader() { is_in_loader = true; } +void ExitLoader()  { is_in_loader = false; } + +extern "C" { +SANITIZER_INTERFACE_ATTRIBUTE +bool __msan_is_in_loader() { return is_in_loader; } +} +  static Flags msan_flags;  Flags *flags() { diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index fae1ad58a6d..123dd36bf35 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -26,6 +26,8 @@  #define MEM_IS_SHADOW(mem) ((uptr)mem >=         0x200000000000ULL && \                              (uptr)mem <=         0x400000000000ULL) +struct link_map;  // Opaque type returned by dlopen(). +  const int kMsanParamTlsSizeInWords = 100;  const int kMsanRetvalTlsSizeInWords = 100; @@ -55,6 +57,9 @@ struct SymbolizerScope {    ~SymbolizerScope() { ExitSymbolizer(); }  }; +void EnterLoader(); +void ExitLoader(); +  void MsanDie();  void PrintWarning(uptr pc, uptr bp);  void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin); @@ -66,6 +71,8 @@ void ReportUMR(StackTrace *stack, u32 origin);  void ReportExpectedUMRNotFound(StackTrace *stack);  void ReportAtExitStatistics(); +void UnpoisonMappedDSO(struct link_map *map); +  #define GET_MALLOC_STACK_TRACE                                     \    StackTrace stack;                                                \    stack.size = 0;                                                  \ diff --git a/compiler-rt/lib/msan/msan_interceptors.cc b/compiler-rt/lib/msan/msan_interceptors.cc index a6f25e8aa04..f81c8de89ef 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cc +++ b/compiler-rt/lib/msan/msan_interceptors.cc @@ -762,6 +762,25 @@ INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {    return res;  } +// dlopen() ultimately calls mmap() down inside the loader, which generally +// doesn't participate in dynamic symbol resolution.  Therefore we won't +// intercept its calls to mmap, and we have to hook it here.  The loader +// initializes the module before returning, so without the dynamic component, we +// won't be able to clear the shadow before the initializers.  Fixing this would +// require putting our own initializer first to clear the shadow. +INTERCEPTOR(void *, dlopen, const char *filename, int flag) { +  ENSURE_MSAN_INITED(); +  EnterLoader(); +  link_map *map = (link_map *)REAL(dlopen)(filename, flag); +  ExitLoader(); +  if (!__msan_has_dynamic_component()) { +    // If msandr didn't clear the shadow before the initializers ran, we do it +    // ourselves afterwards. +    UnpoisonMappedDSO(map); +  } +  return (void *)map; +} +  INTERCEPTOR(int, getrusage, int who, void *usage) {    ENSURE_MSAN_INITED();    int res = REAL(getrusage)(who, usage); @@ -973,6 +992,7 @@ void InitializeInterceptors() {    INTERCEPT_FUNCTION(recvfrom);    INTERCEPT_FUNCTION(recvmsg);    INTERCEPT_FUNCTION(dladdr); +  INTERCEPT_FUNCTION(dlopen);    INTERCEPT_FUNCTION(getrusage);    inited = 1;  } diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index 905c5b79398..e1cd13c3f73 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -104,6 +104,10 @@ int __msan_get_retval_tls_offset();  SANITIZER_INTERFACE_ATTRIBUTE  int __msan_get_param_tls_offset(); +// For intercepting mmap from ld.so in msandr. +SANITIZER_INTERFACE_ATTRIBUTE +bool __msan_is_in_loader(); +  // For testing.  SANITIZER_INTERFACE_ATTRIBUTE  u32 __msan_get_umr_origin(); diff --git a/compiler-rt/lib/msan/msan_linux.cc b/compiler-rt/lib/msan/msan_linux.cc index 0b08b7defe9..64aa35b0b35 100644 --- a/compiler-rt/lib/msan/msan_linux.cc +++ b/compiler-rt/lib/msan/msan_linux.cc @@ -16,6 +16,9 @@  #include "msan.h" +#include <algorithm> +#include <elf.h> +#include <link.h>  #include <stdio.h>  #include <stdlib.h>  #include <signal.h> @@ -87,6 +90,42 @@ static void MsanAtExit(void) {  void InstallAtExitHandler() {    atexit(MsanAtExit);  } + +void UnpoisonMappedDSO(link_map *map) { +  typedef ElfW(Phdr) Elf_Phdr; +  typedef ElfW(Ehdr) Elf_Ehdr; +  char *base = (char *)map->l_addr; +  Elf_Ehdr *ehdr = (Elf_Ehdr *)base; +  char *phdrs = base + ehdr->e_phoff; +  char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize; + +  // Find the segment with the minimum base so we can "relocate" the p_vaddr +  // fields.  Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC +  // objects have a non-zero base. +  uptr preferred_base = ~0ULL; +  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { +    Elf_Phdr *phdr = (Elf_Phdr *)iter; +    if (phdr->p_type == PT_LOAD) +      preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr); +  } + +  // Compute the delta from the real base to get a relocation delta. +  ptrdiff_t delta = (uptr)base - preferred_base; +  // Now we can figure out what the loader really mapped. +  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) { +    Elf_Phdr *phdr = (Elf_Phdr *)iter; +    if (phdr->p_type == PT_LOAD) { +      uptr seg_start = phdr->p_vaddr + delta; +      uptr seg_end = seg_start + phdr->p_memsz; +      // None of these values are aligned.  We consider the ragged edges of the +      // load command as defined, since they are mapped from the file. +      seg_start = RoundDownTo(seg_start, GetPageSizeCached()); +      seg_end = RoundUpTo(seg_end, GetPageSizeCached()); +      __msan_unpoison((void *)seg_start, seg_end - seg_start); +    } +  } +} +  }  // namespace __msan  #endif  // __linux__ diff --git a/compiler-rt/lib/msan/tests/CMakeLists.txt b/compiler-rt/lib/msan/tests/CMakeLists.txt index 7067c45785c..813aad02a25 100644 --- a/compiler-rt/lib/msan/tests/CMakeLists.txt +++ b/compiler-rt/lib/msan/tests/CMakeLists.txt @@ -32,6 +32,7 @@ set(MSAN_LIBCXX_LINK_FLAGS  # Unittest sources and build flags.  set(MSAN_UNITTEST_SOURCE msan_test.cc) +set(MSAN_LOADABLE_SOURCE msan_loadable.cc)  set(MSAN_UNITTEST_HEADERS    msandr_test_so.h    ../../../include/sanitizer/msan_interface.h @@ -65,6 +66,10 @@ set(MSAN_UNITTEST_LINK_FLAGS    # FIXME: we build libcxx without cxxabi and need libstdc++ to provide it.    -lstdc++  ) +set(MSAN_LOADABLE_LINK_FLAGS +  -fsanitize=memory +  -shared +)  # Compile source for the given architecture, using compiler  # options in ${ARGN}, and add it to the object list. @@ -96,6 +101,7 @@ macro(add_msan_test test_suite test_name arch)    add_compiler_rt_test(${test_suite} ${test_name}                         OBJECTS ${ARGN}                         DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN} +                            ${MSAN_LOADABLE_SO}                         LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS}                                    ${TARGET_LINK_FLAGS}                                    "-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}") @@ -130,11 +136,22 @@ macro(add_msan_tests_for_arch arch)    msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch}                 ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS}) +  # Instrumented loadable module objects. +  set(MSAN_INST_LOADABLE_OBJECTS) +  msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch} +               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS}) +    # Uninstrumented shared object for MSanDR tests.    set(MSANDR_TEST_OBJECTS)    msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch}                 ${MSAN_UNITTEST_COMMON_CFLAGS}) +  # Instrumented loadable library tests. +  set(MSAN_LOADABLE_SO) +  msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch} +                   OBJECTS ${MSAN_INST_LOADABLE_OBJECTS} +                   DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES}) +    # Uninstrumented shared library tests.    set(MSANDR_TEST_SO)    msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch} diff --git a/compiler-rt/lib/msan/tests/msan_loadable.cc b/compiler-rt/lib/msan/tests/msan_loadable.cc new file mode 100644 index 00000000000..db3bf489853 --- /dev/null +++ b/compiler-rt/lib/msan/tests/msan_loadable.cc @@ -0,0 +1,45 @@ +//===-- msan_loadable.cc --------------------------------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemorySanitizer. +// +// MemorySanitizer unit tests. +//===----------------------------------------------------------------------===// + +#include "msan/msan_interface_internal.h" +#include <stdlib.h> + +static void *dso_global; + +// No name mangling. +extern "C" { + +__attribute__((constructor)) +void loadable_module_init(void) { +  if (!__msan_has_dynamic_component()) +    return; +  // The real test is that this compare should not make an uninit. +  if (dso_global == NULL) +    dso_global = malloc(4); +} + +__attribute__((destructor)) +void loadable_module_fini(void) { +  if (!__msan_has_dynamic_component()) +    return; +  free(dso_global); +  // *Don't* overwrite it with NULL!  That would unpoison it, but our test +  // relies on reloading at the same address and keeping the poison. +} + +void **get_dso_global() { +  return &dso_global; +} + +} diff --git a/compiler-rt/lib/msan/tests/msan_test.cc b/compiler-rt/lib/msan/tests/msan_test.cc index b30a8dffd79..c1040d5c2b9 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cc +++ b/compiler-rt/lib/msan/tests/msan_test.cc @@ -1288,6 +1288,55 @@ TEST(MemorySanitizer, dladdr) {    EXPECT_NOT_POISONED((unsigned long)info.dli_saddr);  } +#ifdef __GLIBC__ +extern "C" { +  extern void *__libc_stack_end; +} + +static char **GetArgv(void) { +  uintptr_t *stack_end = (uintptr_t *)__libc_stack_end; +  return (char**)(stack_end + 1); +} + +#else  // __GLIBC__ +# error "TODO: port this" +#endif + +TEST(MemorySanitizer, dlopen) { +  // Compute the path to our loadable DSO.  We assume it's in the same +  // directory.  Only use string routines that we intercept so far to do this. +  char **argv = GetArgv(); +  const char *basename = "libmsan_loadable.x86_64.so"; +  size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1; +  char *path = new char[path_max]; +  char *last_slash = strrchr(argv[0], '/'); +  assert(last_slash); +  snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]), +           argv[0], basename); + +  // We need to clear shadow for globals when doing dlopen.  In order to test +  // this, we have to poison the shadow for the DSO before we load it.  In +  // general this is difficult, but the loader tends to reload things in the +  // same place, so we open, close, and then reopen.  The global should always +  // start out clean after dlopen. +  for (int i = 0; i < 2; i++) { +    void *lib = dlopen(path, RTLD_LAZY); +    if (lib == NULL) { +      printf("dlerror: %s\n", dlerror()); +      assert(lib != NULL); +    } +    void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global"); +    assert(get_dso_global); +    void **dso_global = get_dso_global(); +    EXPECT_NOT_POISONED(*dso_global); +    __msan_poison(dso_global, sizeof(*dso_global)); +    EXPECT_POISONED(*dso_global); +    dlclose(lib); +  } + +  delete[] path; +} +  TEST(MemorySanitizer, scanf) {    const char *input = "42 hello";    int* d = new int; diff --git a/compiler-rt/lib/msandr/msandr.cc b/compiler-rt/lib/msandr/msandr.cc index 235a1eddd8e..fee9834ded2 100644 --- a/compiler-rt/lib/msandr/msandr.cc +++ b/compiler-rt/lib/msandr/msandr.cc @@ -37,6 +37,7 @@  #include <drsyscall.h>  #include <sys/mman.h> +#include <sys/syscall.h>  /* for SYS_mmap */  #include <algorithm>  #include <string> @@ -103,6 +104,17 @@ ModuleData::ModuleData(const module_data_t *info)  int(*__msan_get_retval_tls_offset)();  int(*__msan_get_param_tls_offset)(); +void (*__msan_unpoison)(void *base, size_t size); +bool (*__msan_is_in_loader)(); + +static generic_func_t LookupCallback(module_data_t *app, const char *name) { +  generic_func_t callback = dr_get_proc_address(app->handle, name); +  if (callback == NULL) { +    dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); +    CHECK(callback); +  } +  return callback; +}  void InitializeMSanCallbacks() {    module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); @@ -113,25 +125,18 @@ void InitializeMSanCallbacks() {    }    g_app_path = app->full_path; -  const char *callback_name = "__msan_get_retval_tls_offset"; -  __msan_get_retval_tls_offset = -      (int(*)()) dr_get_proc_address(app->handle, callback_name); -  if (__msan_get_retval_tls_offset == NULL) { -    dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path); -    CHECK(__msan_get_retval_tls_offset); -  } +  __msan_get_retval_tls_offset = (int (*)()) +      LookupCallback(app, "__msan_get_retval_tls_offset"); +  __msan_get_param_tls_offset = (int (*)()) +      LookupCallback(app, "__msan_get_param_tls_offset"); +  __msan_unpoison = (void(*)(void *, size_t)) +      LookupCallback(app, "__msan_unpoison"); +  __msan_is_in_loader = (bool (*)()) +      LookupCallback(app, "__msan_is_in_loader"); -  callback_name = "__msan_get_param_tls_offset"; -  __msan_get_param_tls_offset = -      (int(*)()) dr_get_proc_address(app->handle, callback_name); -  if (__msan_get_param_tls_offset == NULL) { -    dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path); -    CHECK(__msan_get_param_tls_offset); -  } +  dr_free_module_data(app);  } -#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL) -  // FIXME: Handle absolute addresses and PC-relative addresses.  // FIXME: Handle TLS accesses via FS or GS.  DR assumes all other segments have  // a zero base anyway. @@ -520,7 +525,7 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {    if (arg->pre)      return true; -  if (arg->mode != DRSYS_PARAM_OUT) +  if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))      return true;    size_t sz = arg->size; @@ -538,8 +543,19 @@ bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {                (unsigned long long)(sz & 0xFFFFFFFF));    } -  void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr); -  memset(p, 0, sz); +  if (VERBOSITY > 0) { +    drmf_status_t res; +    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; +    const char *name; +    res = drsys_syscall_name(syscall, &name); +    dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", +              name, arg->ordinal, arg->start_addr, +              (char *)arg->start_addr + sz); +  } + +  // We don't switch to the app context because __msan_unpoison() doesn't need +  // TLS segments. +  __msan_unpoison(arg->start_addr, sz);    return true; /* keep going */  } @@ -576,6 +592,19 @@ bool event_pre_syscall(void *drcontext, int sysnum) {    return true;  } +static bool IsInLoader(void *drcontext) { +  // TODO: This segment swap is inefficient.  DR should just let us query the +  // app segment base, which it has.  Alternatively, if we disable +  // -mangle_app_seg, then we won't need the swap. +  bool need_swap = !dr_using_app_state(drcontext); +  if (need_swap) +    dr_switch_to_app_state(drcontext); +  bool is_in_loader = __msan_is_in_loader(); +  if (need_swap) +    dr_switch_to_dr_state(drcontext); +  return is_in_loader; +} +  void event_post_syscall(void *drcontext, int sysnum) {    drsys_syscall_t *syscall;    drsys_sysnum_t sysnum_full; @@ -598,6 +627,30 @@ void event_post_syscall(void *drcontext, int sysnum) {          drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);      CHECK(res == DRMF_SUCCESS);    } + +  // Our normal mmap interceptor can't intercept calls from the loader itself. +  // This means we don't clear the shadow for calls to dlopen.  For now, we +  // solve this by intercepting mmap from ld.so here, but ideally we'd have a +  // solution that doesn't rely on msandr. +  // +  // Be careful not to intercept maps done by the msan rtl.  Otherwise we end up +  // unpoisoning vast regions of memory and OOMing. +  // TODO: __msan_unpoison() could "flush" large regions of memory like tsan +  // does instead of doing a large memset.  However, we need the memory to be +  // zeroed, where as tsan does not, so plain madvise is not enough. +  if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { +    if (IsInLoader(drcontext)) { +      app_pc base = (app_pc)dr_syscall_get_result(drcontext); +      ptr_uint_t size; +      drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); +      CHECK(res == DRMF_SUCCESS); +      if (VERBOSITY > 0) +        dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); +      // We don't switch to the app context because __msan_unpoison() doesn't +      // need TLS segments. +      __msan_unpoison(base, size); +    } +  }  }  } // namespace  | 

