From d0edb877c861c146e97745604a2d1933a4844130 Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 24 Nov 2015 20:27:09 -0600 Subject: libc support for thread-local storage RTC: 124148 Change-Id: I055885bc7d7cfc4ebd7cf1a662f677bdf4e28c62 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/22313 Tested-by: Jenkins Server Reviewed-by: Richard J. Knight Reviewed-by: Brian Silver Reviewed-by: A. Patrick Williams III --- src/lib/makefile | 6 +- src/lib/syscall_task.C | 10 +- src/lib/tls.C | 334 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 346 insertions(+), 4 deletions(-) create mode 100644 src/lib/tls.C (limited to 'src/lib') diff --git a/src/lib/makefile b/src/lib/makefile index 505628150..47076f964 100644 --- a/src/lib/makefile +++ b/src/lib/makefile @@ -5,7 +5,9 @@ # # OpenPOWER HostBoot Project # -# COPYRIGHT International Business Machines Corp. 2010,2014 +# Contributors Listed Below - COPYRIGHT 2010,2015 +# [+] International Business Machines Corp. +# # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -45,6 +47,8 @@ OBJS += cxxtest_data.o OBJS += crc32.o OBJS += utilmisc.o +OBJS += tls.o + ifdef HOSTBOOT_MEMORY_LEAKS COMMONFLAGS += -DHOSTBOOT_MEMORY_LEAKS=1 endif diff --git a/src/lib/syscall_task.C b/src/lib/syscall_task.C index fe95a6cde..338a5f76a 100644 --- a/src/lib/syscall_task.C +++ b/src/lib/syscall_task.C @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2010,2014 */ +/* Contributors Listed Below - COPYRIGHT 2010,2015 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -47,14 +49,16 @@ tid_t task_create(void*(*fn)(void*), void* ptr) return (tid_t)(uint64_t) _syscall2(TASK_START, (void*)fn, ptr); } +extern "C" void task_end_stub(void*) NO_RETURN; + void task_end() { - _syscall1_nr(TASK_END, NULL); // no return. + task_end_stub(NULL); } void task_end2(void* retval) { - _syscall1_nr(TASK_END, retval); // no return. + task_end_stub(retval); } tid_t task_gettid() diff --git a/src/lib/tls.C b/src/lib/tls.C new file mode 100644 index 000000000..4fcafb23b --- /dev/null +++ b/src/lib/tls.C @@ -0,0 +1,334 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/lib/tls.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +#define assert crit_assert + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** Thread Local Storage - How it works. + * + * For background: + * - http://www.akkadia.org/drepper/tls.pdf + * - http://www.uclibc.org/docs/tls-ppc64.txt + * + * Normal global variables go into a per-module ELF section of .data or .bss + * depending on if the variable is non-zero or zero initialized respectively. + * To support TLS, the compiler emits a new .tdata and .tbss data. It is + * expected that the runtime support (this code) will create a new copy of + * the .tdata / .tbss section per-thread. + * + * The implementation of TLS needs to be both fast and use minimum memory. + * The TLS design (in tls.pdf and the compiler implementation) allows lazy + * creation of the TLS data on a per-thread and per-module basis and it is + * also organized in a way to allow TLS variable lookups to typically be done + * with only a few pointer dereferences. + * + * When you create a thread local variable, such as through the thread_local + * C++11 syntax, the compiler will create a tuple for each variable. Each + * module is assigned a module-id by the linker and the tuple has the + * module-id and the offset in the .tdata / .tbss section for each variable. + * In the case where a TLS variable from a module has already been accessed, + * TLS access is as simple as: task_t->tls_context->blobs[module][offset]. + * When a variable has not yet been accessed by a thread, the blob for the + * module must be allocated and initialized from the module's original + * .tdata / .tbss section. + * + * One oddity in the design is the disconnect between the tuples and the + * .tdata section. The module_init code can find if a .tdata exists, by + * checking the size of __tls_start_addr and __tls_end_addr, but it does not + * know the module-id that was assigned by the linker. Therefore, we have + * module_init 'register' the tls_start/tls_end when the module loads, but + * have to defer determining the module-id until the first TLS access of + * a variable (in any thread). At this point, we find the .tdata address + * closest to the variable to match up the module-id and .tdata address. This + * is the purpose of the __tls_pending_modules and __tls_modules structures. + * Once a module has been matched up once we can use the __tls_modules for + * quicker lookups of a TLS access in any other thread. + */ + + +/** Tuple created by the linker for each TLS variable. */ +struct __tls_linker_tuple +{ + size_t module; + size_t offset; +}; + +/** Info about the .tdata section for each module. */ +struct __tls_module +{ + void* sect_addr; + size_t size; +}; + +/** TLS destructor data. */ +struct __tls_dtor +{ + void (*dtor)(void*); + void* arg; + + __tls_dtor* next; +}; + +/** TLS data for each task to go into task_t. */ +struct __tls_thread_info +{ + size_t count; + Util::Lockfree::Stack<__tls_dtor> dtors; + void* blobs[0]; +}; + +mutex_t __tls_mutex = MUTEX_INITIALIZER; +std::vector<__tls_module> __tls_modules; +std::vector<__tls_module> __tls_pending_modules; + +/** Get the previously registered __tls_module data for a TLS variable */ +const __tls_module* __tls_get_module(const __tls_linker_tuple* tuple) +{ + // Look the module up in the __tls_modules first, in case we've seen + // this module before in another thread. + if ((__tls_modules.size() > tuple->module) && + (__tls_modules[tuple->module].sect_addr != nullptr)) + { + return &__tls_modules[tuple->module]; + } + + // We plan to insert a new module, so make sure we can contain it. + if (__tls_modules.size() <= tuple->module) + { + __tls_modules.resize(tuple->module+1); + } + + // This is the first time we've seen this module. Need to look up in + // pending. + + // The TLS sections are at the beginning of the .rodata section and the + // linker tuples are somewhere in .data. Therefore sect_addr < tuple. + // Search __tls_pending_modules for the highest address section that is + // less than the tuple address. + auto best = __tls_pending_modules.begin(); + auto curr = best; + while(curr != __tls_pending_modules.end()) + { + if ((curr->sect_addr > best->sect_addr) && + (curr->sect_addr < tuple)) + { + best = curr; + } + ++curr; + } + assert(best != __tls_pending_modules.end()); + + // Copy it into the __tls_modules and remove it from the pending list. + __tls_modules[tuple->module] = *best; + __tls_pending_modules.erase(best); + + return &__tls_modules[tuple->module]; +} + +/* Since Hostboot runtime only has a single thread, we'll just create a + * single global TLS area. */ +#ifdef __HOSTBOOT_RUNTIME +task_t __tls_task_struct; +#endif + +/** Get a TLS variable address + * + * Calls to this automatically inserted by the compiler. + */ +extern "C" +void* __tls_get_addr(const __tls_linker_tuple* tuple) +{ + task_t* task = nullptr; +#ifdef __HOSTBOOT_RUNTIME + task = &tls_task_struct; +#else + // Get the task_t pointer from register 13. + asm volatile("mr %0, 13" : "=r"(task)); +#endif + + auto tls_info = reinterpret_cast<__tls_thread_info*>(task->tls_context); + + // If: + // - tls_info is nullptr. + // - tls count is not at least as large as this module id. + // - tls[module] is nullptr + // Then: module blob needs to be allocated. + if ((tls_info == nullptr) || + (tls_info->count <= tuple->module) || + (tls_info->blobs[tuple->module] == nullptr)) + { + + // If there isn't room for the module's blob, we need to allocate it. + if ((tls_info == nullptr) || (tls_info->count <= tuple->module)) + { + decltype(__tls_thread_info::count) old_size = 0; + auto new_size = sizeof(__tls_thread_info) + + sizeof(void*)*(tuple->module+1); + + // Allocate or reallocate the tls info. + if (tls_info == nullptr) + { + old_size = 0; + tls_info = reinterpret_cast( + malloc(new_size)); + memset(&tls_info->dtors, '\0', sizeof(tls_info->dtors)); + } + else + { + old_size = tls_info->count; + tls_info = reinterpret_cast( + realloc(tls_info, new_size)); + } + + // Clear the newly allocated area and update the count. + memset(&tls_info->blobs[old_size], '\0', new_size - + (sizeof(__tls_thread_info) + sizeof(void*)*old_size)); + tls_info->count = tuple->module+1; + + // save into task struct. + task->tls_context = tls_info; + + } + + // Allocate and copy TLS blob. + mutex_lock(&__tls_mutex); + { + auto module = __tls_get_module(tuple); + auto blob = tls_info->blobs[tuple->module] = malloc(module->size); + memcpy(blob, module->sect_addr, module->size); + } + mutex_unlock(&__tls_mutex); + + } + + // Return the offset of the TLS variable from this module's blob. + return &reinterpret_cast(tls_info->blobs[tuple->module]) + [tuple->offset+VFS_PPC64_DTPREL_OFFSET]; +} + +/** Register a module's __tls_start_address / __tls_end_address. + * + * Called by init() in module_init. + */ +void __tls_register(void* s, void* e) +{ + if (s == e) + return; + + __tls_module m = { s, ((size_t)e) - ((size_t)s) }; + + mutex_lock(&__tls_mutex); + { + __tls_pending_modules.push_back(m); + } + mutex_unlock(&__tls_mutex); +} + +/** Clean up registration on unload. + * + * Called by fini() in module_init. + */ +void __tls_unregister(void* s, void* e) +{ + if (s == e) + return; + + using std::remove_if; + + mutex_lock(&__tls_mutex); + { + auto& v = __tls_pending_modules; + v.erase(remove_if(v.begin(), v.end(), + [s](const auto& i){ return i.sect_addr == s; }), + v.end()); + } + mutex_unlock(&__tls_mutex); +} + +/** Clean up TLS data for a task. + * + * Called by task_end_stub. + */ +extern "C" +void __tls_cleanup(__tls_thread_info* info) +{ + // Call TLS destructors. + while(auto d = info->dtors.pop()) + { + d->dtor(d->arg); + free(d); + } + + // Free TLS blobs. + decltype(__tls_thread_info::count) i = 0; + while(i < info->count) + { + free(info->blobs[i]); + ++i; + } + free(info); +} + +/** Register a C++ dtor for TLS data. + * + * Automatically called by compiler when a TLS variable has a constructor / + * destructor. + */ +extern "C" +int __cxa_thread_atexit(void (*dtor)(void*), void* arg, void* dso) +{ + task_t* task = nullptr; +#ifdef __HOSTBOOT_RUNTIME + task = &tls_task_struct; +#else + // Get the task_t pointer from register 13. + asm volatile("mr %0, 13" : "=r"(task)); +#endif + + // Get tls_info from task_t or allocate a new one. + auto tls_info = reinterpret_cast<__tls_thread_info*>(task->tls_context); + if (nullptr == tls_info) + { + task->tls_context = tls_info = + reinterpret_cast( + calloc(1, sizeof(decltype(*tls_info)))); + } + + // Insert a new dtor registration. + auto dtor_info = reinterpret_cast<__tls_dtor*>(malloc(sizeof(__tls_dtor))); + dtor_info->dtor = dtor; + dtor_info->arg = arg; + tls_info->dtors.push(dtor_info); + + return 0; +} -- cgit v1.2.1