From d0edb877c861c146e97745604a2d1933a4844130 Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 24 Nov 2015 20:27:09 -0600 Subject: libc support for thread-local storage RTC: 124148 Change-Id: I055885bc7d7cfc4ebd7cf1a662f677bdf4e28c62 Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/22313 Tested-by: Jenkins Server Reviewed-by: Richard J. Knight Reviewed-by: Brian Silver Reviewed-by: A. Patrick Williams III --- src/build/debug/Hostboot/Ps.pm | 2 +- src/build/linker/linker.C | 2 +- src/include/kernel/ppcconsts.S | 6 +- src/include/kernel/task.H | 6 +- src/include/sys/vfs.h | 4 + src/kernel/start.S | 22 ++- src/kernel/taskmgr.C | 7 +- src/lib/makefile | 6 +- src/lib/syscall_task.C | 10 +- src/lib/tls.C | 334 +++++++++++++++++++++++++++++++++++++++++ src/makefile | 1 + src/usr/module_init.C | 15 +- src/usr/testcore/lib/tls.H | 75 +++++++++ 13 files changed, 478 insertions(+), 12 deletions(-) create mode 100644 src/lib/tls.C create mode 100644 src/usr/testcore/lib/tls.H diff --git a/src/build/debug/Hostboot/Ps.pm b/src/build/debug/Hostboot/Ps.pm index 910d122d8..40aff5ccf 100755 --- a/src/build/debug/Hostboot/Ps.pm +++ b/src/build/debug/Hostboot/Ps.pm @@ -50,7 +50,7 @@ use constant PS_TRACKER_RETVAL_OFFSET => 8 + PS_TRACKER_STATUS_OFFSET; use constant PS_TRACKER_WAITINFO_OFFSET => 8 + PS_TRACKER_RETVAL_OFFSET; use constant PS_TRACKER_ENTRYPOINT_OFFSET => 8 + PS_TRACKER_WAITINFO_OFFSET; -use constant PS_TASK_STATE_OFFSET => 8*43; +use constant PS_TASK_STATE_OFFSET => 8*44; use constant PS_TASK_STATEEXTRA_OFFSET => 8 + PS_TASK_STATE_OFFSET; use bigint; diff --git a/src/build/linker/linker.C b/src/build/linker/linker.C index 167da1858..fb154d989 100644 --- a/src/build/linker/linker.C +++ b/src/build/linker/linker.C @@ -845,7 +845,7 @@ bool Object::perform_local_relocations() else if (i->type & Symbol::TLS_OFFSET) { // Set value to TLS offset. - address = i->addend; + address = i->addend - VFS_PPC64_DTPREL_OFFSET; needs_relocation = false; relocation = address; } diff --git a/src/include/kernel/ppcconsts.S b/src/include/kernel/ppcconsts.S index b58d41e65..6da85a9b2 100644 --- a/src/include/kernel/ppcconsts.S +++ b/src/include/kernel/ppcconsts.S @@ -5,7 +5,9 @@ # # OpenPOWER HostBoot Project # -# COPYRIGHT International Business Machines Corp. 2010,2014 +# Contributors Listed Below - COPYRIGHT 2010,2015 +# [+] International Business Machines Corp. +# # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -245,6 +247,8 @@ .set TASK_FPR_31, TASK_FPR_30+8 .set TASK_FPSCR, TASK_FPR_31+8 +.set TASK_TLS_CONTEXT, TASK_FP_CONTEXT+8 + .set CPU_KERNEL_STACK, 0 .set CPU_KERNEL_STACK_BOTTOM, CPU_KERNEL_STACK+8 .set CPU_CPUID, CPU_KERNEL_STACK_BOTTOM+8 diff --git a/src/include/kernel/task.H b/src/include/kernel/task.H index 47f143d77..0981dc3ab 100644 --- a/src/include/kernel/task.H +++ b/src/include/kernel/task.H @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2010,2014 */ +/* Contributors Listed Below - COPYRIGHT 2010,2015 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -93,6 +95,8 @@ struct task_t context_t context; /** Pointer to optional floating point context. */ context_fp_t* fp_context; + /** Thread-local-storage area for userspace. */ + void* tls_context; /** Task ID */ tid_t tid; diff --git a/src/include/sys/vfs.h b/src/include/sys/vfs.h index 0d496243f..7bbe73e30 100644 --- a/src/include/sys/vfs.h +++ b/src/include/sys/vfs.h @@ -97,6 +97,10 @@ extern VfsSystemModule VFS_MODULES[VFS_MODULE_MAX]; extern uint64_t VFS_LAST_ADDRESS; +// Offset for TLS "dtv-relative displacement". +// See http://www.uclibc.org/docs/tls-ppc64.txt +#define VFS_PPC64_DTPREL_OFFSET 0x8000 + #ifdef __cplusplus } #endif diff --git a/src/kernel/start.S b/src/kernel/start.S index 6474a2ced..13351b893 100644 --- a/src/kernel/start.S +++ b/src/kernel/start.S @@ -5,7 +5,7 @@ # # OpenPOWER HostBoot Project # -# Contributors Listed Below - COPYRIGHT 2010,2014 +# Contributors Listed Below - COPYRIGHT 2010,2015 # [+] International Business Machines Corp. # # @@ -817,6 +817,26 @@ userspace_task_entry: ;// inserts garbage code into the task_end C function. .global task_end_stub task_end_stub: + // Check for a NULL stack pointer and skip TLS cleanup. + cmpi cr0, r1, 0 + beq 1f + // Check for a NULL TLS-context and skip TLS cleanup. + ld r0, TASK_TLS_CONTEXT(r13) + cmpi cr0, r0, 0 + beq 1f + // Save off r3. + mr r31, r3 + // Set up TOC for __tls_cleanup + lis r2, __tls_cleanup@h + ori r2, r2, __tls_cleanup@l + ld r2, 8(r2) + // Call __tls_cleanup + mr r3, r0 + bl __tls_cleanup + // Restore r3. + mr r3, r31 +1: + // Call task-end syscall. mr r4, r3 ;// Move current rc (r3) to status value (r4) li r3, 2 ;// TASK_END -> r3 (syscall number) sc diff --git a/src/kernel/taskmgr.C b/src/kernel/taskmgr.C index cb7e335dc..03b3d0073 100644 --- a/src/kernel/taskmgr.C +++ b/src/kernel/taskmgr.C @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2010,2014 */ +/* Contributors Listed Below - COPYRIGHT 2010,2015 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -116,13 +116,16 @@ task_t* TaskManager::_createTask(TaskManager::task_fn_t t, } else { - task->context.stack_ptr = NULL; + task->context.stack_ptr = NULL; task->context.gprs[1] = NULL; } // Clear FP context (start with FP disabled on all tasks). task->fp_context = NULL; + // Clear out the TLS context. + task->tls_context = NULL; + // Clear task state info. task->state = TASK_STATE_READY; task->state_info = NULL; diff --git a/src/lib/makefile b/src/lib/makefile index 505628150..47076f964 100644 --- a/src/lib/makefile +++ b/src/lib/makefile @@ -5,7 +5,9 @@ # # OpenPOWER HostBoot Project # -# COPYRIGHT International Business Machines Corp. 2010,2014 +# Contributors Listed Below - COPYRIGHT 2010,2015 +# [+] International Business Machines Corp. +# # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -45,6 +47,8 @@ OBJS += cxxtest_data.o OBJS += crc32.o OBJS += utilmisc.o +OBJS += tls.o + ifdef HOSTBOOT_MEMORY_LEAKS COMMONFLAGS += -DHOSTBOOT_MEMORY_LEAKS=1 endif diff --git a/src/lib/syscall_task.C b/src/lib/syscall_task.C index fe95a6cde..338a5f76a 100644 --- a/src/lib/syscall_task.C +++ b/src/lib/syscall_task.C @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2010,2014 */ +/* Contributors Listed Below - COPYRIGHT 2010,2015 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -47,14 +49,16 @@ tid_t task_create(void*(*fn)(void*), void* ptr) return (tid_t)(uint64_t) _syscall2(TASK_START, (void*)fn, ptr); } +extern "C" void task_end_stub(void*) NO_RETURN; + void task_end() { - _syscall1_nr(TASK_END, NULL); // no return. + task_end_stub(NULL); } void task_end2(void* retval) { - _syscall1_nr(TASK_END, retval); // no return. + task_end_stub(retval); } tid_t task_gettid() diff --git a/src/lib/tls.C b/src/lib/tls.C new file mode 100644 index 000000000..4fcafb23b --- /dev/null +++ b/src/lib/tls.C @@ -0,0 +1,334 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/lib/tls.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +#define assert crit_assert + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** Thread Local Storage - How it works. + * + * For background: + * - http://www.akkadia.org/drepper/tls.pdf + * - http://www.uclibc.org/docs/tls-ppc64.txt + * + * Normal global variables go into a per-module ELF section of .data or .bss + * depending on if the variable is non-zero or zero initialized respectively. + * To support TLS, the compiler emits a new .tdata and .tbss data. It is + * expected that the runtime support (this code) will create a new copy of + * the .tdata / .tbss section per-thread. + * + * The implementation of TLS needs to be both fast and use minimum memory. + * The TLS design (in tls.pdf and the compiler implementation) allows lazy + * creation of the TLS data on a per-thread and per-module basis and it is + * also organized in a way to allow TLS variable lookups to typically be done + * with only a few pointer dereferences. + * + * When you create a thread local variable, such as through the thread_local + * C++11 syntax, the compiler will create a tuple for each variable. Each + * module is assigned a module-id by the linker and the tuple has the + * module-id and the offset in the .tdata / .tbss section for each variable. + * In the case where a TLS variable from a module has already been accessed, + * TLS access is as simple as: task_t->tls_context->blobs[module][offset]. + * When a variable has not yet been accessed by a thread, the blob for the + * module must be allocated and initialized from the module's original + * .tdata / .tbss section. + * + * One oddity in the design is the disconnect between the tuples and the + * .tdata section. The module_init code can find if a .tdata exists, by + * checking the size of __tls_start_addr and __tls_end_addr, but it does not + * know the module-id that was assigned by the linker. Therefore, we have + * module_init 'register' the tls_start/tls_end when the module loads, but + * have to defer determining the module-id until the first TLS access of + * a variable (in any thread). At this point, we find the .tdata address + * closest to the variable to match up the module-id and .tdata address. This + * is the purpose of the __tls_pending_modules and __tls_modules structures. + * Once a module has been matched up once we can use the __tls_modules for + * quicker lookups of a TLS access in any other thread. + */ + + +/** Tuple created by the linker for each TLS variable. */ +struct __tls_linker_tuple +{ + size_t module; + size_t offset; +}; + +/** Info about the .tdata section for each module. */ +struct __tls_module +{ + void* sect_addr; + size_t size; +}; + +/** TLS destructor data. */ +struct __tls_dtor +{ + void (*dtor)(void*); + void* arg; + + __tls_dtor* next; +}; + +/** TLS data for each task to go into task_t. */ +struct __tls_thread_info +{ + size_t count; + Util::Lockfree::Stack<__tls_dtor> dtors; + void* blobs[0]; +}; + +mutex_t __tls_mutex = MUTEX_INITIALIZER; +std::vector<__tls_module> __tls_modules; +std::vector<__tls_module> __tls_pending_modules; + +/** Get the previously registered __tls_module data for a TLS variable */ +const __tls_module* __tls_get_module(const __tls_linker_tuple* tuple) +{ + // Look the module up in the __tls_modules first, in case we've seen + // this module before in another thread. + if ((__tls_modules.size() > tuple->module) && + (__tls_modules[tuple->module].sect_addr != nullptr)) + { + return &__tls_modules[tuple->module]; + } + + // We plan to insert a new module, so make sure we can contain it. + if (__tls_modules.size() <= tuple->module) + { + __tls_modules.resize(tuple->module+1); + } + + // This is the first time we've seen this module. Need to look up in + // pending. + + // The TLS sections are at the beginning of the .rodata section and the + // linker tuples are somewhere in .data. Therefore sect_addr < tuple. + // Search __tls_pending_modules for the highest address section that is + // less than the tuple address. + auto best = __tls_pending_modules.begin(); + auto curr = best; + while(curr != __tls_pending_modules.end()) + { + if ((curr->sect_addr > best->sect_addr) && + (curr->sect_addr < tuple)) + { + best = curr; + } + ++curr; + } + assert(best != __tls_pending_modules.end()); + + // Copy it into the __tls_modules and remove it from the pending list. + __tls_modules[tuple->module] = *best; + __tls_pending_modules.erase(best); + + return &__tls_modules[tuple->module]; +} + +/* Since Hostboot runtime only has a single thread, we'll just create a + * single global TLS area. */ +#ifdef __HOSTBOOT_RUNTIME +task_t __tls_task_struct; +#endif + +/** Get a TLS variable address + * + * Calls to this automatically inserted by the compiler. + */ +extern "C" +void* __tls_get_addr(const __tls_linker_tuple* tuple) +{ + task_t* task = nullptr; +#ifdef __HOSTBOOT_RUNTIME + task = &tls_task_struct; +#else + // Get the task_t pointer from register 13. + asm volatile("mr %0, 13" : "=r"(task)); +#endif + + auto tls_info = reinterpret_cast<__tls_thread_info*>(task->tls_context); + + // If: + // - tls_info is nullptr. + // - tls count is not at least as large as this module id. + // - tls[module] is nullptr + // Then: module blob needs to be allocated. + if ((tls_info == nullptr) || + (tls_info->count <= tuple->module) || + (tls_info->blobs[tuple->module] == nullptr)) + { + + // If there isn't room for the module's blob, we need to allocate it. + if ((tls_info == nullptr) || (tls_info->count <= tuple->module)) + { + decltype(__tls_thread_info::count) old_size = 0; + auto new_size = sizeof(__tls_thread_info) + + sizeof(void*)*(tuple->module+1); + + // Allocate or reallocate the tls info. + if (tls_info == nullptr) + { + old_size = 0; + tls_info = reinterpret_cast( + malloc(new_size)); + memset(&tls_info->dtors, '\0', sizeof(tls_info->dtors)); + } + else + { + old_size = tls_info->count; + tls_info = reinterpret_cast( + realloc(tls_info, new_size)); + } + + // Clear the newly allocated area and update the count. + memset(&tls_info->blobs[old_size], '\0', new_size - + (sizeof(__tls_thread_info) + sizeof(void*)*old_size)); + tls_info->count = tuple->module+1; + + // save into task struct. + task->tls_context = tls_info; + + } + + // Allocate and copy TLS blob. + mutex_lock(&__tls_mutex); + { + auto module = __tls_get_module(tuple); + auto blob = tls_info->blobs[tuple->module] = malloc(module->size); + memcpy(blob, module->sect_addr, module->size); + } + mutex_unlock(&__tls_mutex); + + } + + // Return the offset of the TLS variable from this module's blob. + return &reinterpret_cast(tls_info->blobs[tuple->module]) + [tuple->offset+VFS_PPC64_DTPREL_OFFSET]; +} + +/** Register a module's __tls_start_address / __tls_end_address. + * + * Called by init() in module_init. + */ +void __tls_register(void* s, void* e) +{ + if (s == e) + return; + + __tls_module m = { s, ((size_t)e) - ((size_t)s) }; + + mutex_lock(&__tls_mutex); + { + __tls_pending_modules.push_back(m); + } + mutex_unlock(&__tls_mutex); +} + +/** Clean up registration on unload. + * + * Called by fini() in module_init. + */ +void __tls_unregister(void* s, void* e) +{ + if (s == e) + return; + + using std::remove_if; + + mutex_lock(&__tls_mutex); + { + auto& v = __tls_pending_modules; + v.erase(remove_if(v.begin(), v.end(), + [s](const auto& i){ return i.sect_addr == s; }), + v.end()); + } + mutex_unlock(&__tls_mutex); +} + +/** Clean up TLS data for a task. + * + * Called by task_end_stub. + */ +extern "C" +void __tls_cleanup(__tls_thread_info* info) +{ + // Call TLS destructors. + while(auto d = info->dtors.pop()) + { + d->dtor(d->arg); + free(d); + } + + // Free TLS blobs. + decltype(__tls_thread_info::count) i = 0; + while(i < info->count) + { + free(info->blobs[i]); + ++i; + } + free(info); +} + +/** Register a C++ dtor for TLS data. + * + * Automatically called by compiler when a TLS variable has a constructor / + * destructor. + */ +extern "C" +int __cxa_thread_atexit(void (*dtor)(void*), void* arg, void* dso) +{ + task_t* task = nullptr; +#ifdef __HOSTBOOT_RUNTIME + task = &tls_task_struct; +#else + // Get the task_t pointer from register 13. + asm volatile("mr %0, 13" : "=r"(task)); +#endif + + // Get tls_info from task_t or allocate a new one. + auto tls_info = reinterpret_cast<__tls_thread_info*>(task->tls_context); + if (nullptr == tls_info) + { + task->tls_context = tls_info = + reinterpret_cast( + calloc(1, sizeof(decltype(*tls_info)))); + } + + // Insert a new dtor registration. + auto dtor_info = reinterpret_cast<__tls_dtor*>(malloc(sizeof(__tls_dtor))); + dtor_info->dtor = dtor; + dtor_info->arg = arg; + tls_info->dtors.push(dtor_info); + + return 0; +} diff --git a/src/makefile b/src/makefile index 744b575ac..0a6adae35 100644 --- a/src/makefile +++ b/src/makefile @@ -52,6 +52,7 @@ BASE_OBJECTS += cxxtest_data.o BASE_OBJECTS += sprintf.o BASE_OBJECTS += crc32.o BASE_OBJECTS += utilmisc.o +BASE_OBJECTS += tls.o ifdef HOSTBOOT_PROFILE BASE_OBJECTS += gcov.o diff --git a/src/usr/module_init.C b/src/usr/module_init.C index b07ace0c2..3e9a25e58 100644 --- a/src/usr/module_init.C +++ b/src/usr/module_init.C @@ -5,7 +5,9 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* COPYRIGHT International Business Machines Corp. 2011,2014 */ +/* Contributors Listed Below - COPYRIGHT 2011,2015 */ +/* [+] International Business Machines Corp. */ +/* */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ @@ -21,6 +23,8 @@ /* */ /* IBM_PROLOG_END_TAG */ void call_dtors(void * i_dso_handle); +void __tls_register(void * tls_start, void * tls_end); +void __tls_unregister(void * tls_start, void * tls_end); // This identifies the module void* __dso_handle = (void*) &__dso_handle; @@ -28,6 +32,11 @@ void* __dso_handle = (void*) &__dso_handle; extern "C" void _init(void*) { + // Register thread-local storage. + extern void* tls_start_address; + extern void* tls_end_address; + __tls_register(&tls_start_address, &tls_end_address); + // Call default constructors for any static objects. extern void (*ctor_start_address)(); extern void (*ctor_end_address)(); @@ -43,6 +52,10 @@ extern "C" void _fini(void) { call_dtors(__dso_handle); + + extern void* tls_start_address; + extern void* tls_end_address; + __tls_unregister(&tls_start_address, &tls_end_address); } diff --git a/src/usr/testcore/lib/tls.H b/src/usr/testcore/lib/tls.H new file mode 100644 index 000000000..84f7906a0 --- /dev/null +++ b/src/usr/testcore/lib/tls.H @@ -0,0 +1,75 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/usr/testcore/lib/tls.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +#ifndef __LIB_TLS_H +#define __LIB_TLS_H + +#include +#include + +namespace __tls_test +{ + thread_local size_t foobar = 0xabcd1234; + + void* test_tls(void* unused) + { + decltype(foobar) rc; + + rc = foobar; + if(rc != 0xabcd1234) + { + TS_FAIL("TLS not initialized correctly: %ld", rc); + } + + task_yield(); + + foobar++; + + task_yield(); + + rc = foobar; + if(rc != 0xabcd1235) + { + TS_FAIL("TLS increment not operating correctly: %ld", rc); + } + + return nullptr; + } +}; + +class LibcTlsTest : public CxxTest::TestSuite +{ + public: + void testTls() + { + auto t1 = task_create(__tls_test::test_tls, nullptr); + auto t2 = task_create(__tls_test::test_tls, nullptr); + auto t3 = task_create(__tls_test::test_tls, nullptr); + task_wait_tid(t1, nullptr, nullptr); + task_wait_tid(t2, nullptr, nullptr); + task_wait_tid(t3, nullptr, nullptr); + } + +}; +#endif -- cgit v1.2.1