diff options
Diffstat (limited to 'openmp/tools/archer/ompt-tsan.cpp')
-rw-r--r-- | openmp/tools/archer/ompt-tsan.cpp | 904 |
1 files changed, 904 insertions, 0 deletions
diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp new file mode 100644 index 00000000000..552edf3af32 --- /dev/null +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -0,0 +1,904 @@ +/* + * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer + */ + + //===----------------------------------------------------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for details. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include <atomic> +#include <cassert> +#include <cstdlib> +#include <cstring> +#include <inttypes.h> +#include <iostream> +#include <mutex> +#include <sstream> +#include <stack> +#include <list> +#include <string> +#include <iostream> +#include <unordered_map> +#include <vector> + +#if (defined __APPLE__ && defined __MACH__) +#include <dlfcn.h> +#endif + +#include <sys/resource.h> +#include "omp-tools.h" + +static int runOnTsan; +static int hasReductionCallback; + +class ArcherFlags { +public: +#if (LLVM_VERSION) >= 40 + int flush_shadow; +#endif + int print_max_rss; + int verbose; + int enabled; + + ArcherFlags(const char *env) + : +#if (LLVM_VERSION) >= 40 + flush_shadow(0), +#endif + print_max_rss(0), verbose(0), enabled(1) { + if (env) { + std::vector<std::string> tokens; + std::string token; + std::string str(env); + std::istringstream iss(str); + while (std::getline(iss, token, ' ')) + tokens.push_back(token); + + for (std::vector<std::string>::iterator it = tokens.begin(); + it != tokens.end(); ++it) { +#if (LLVM_VERSION) >= 40 + if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow)) + continue; +#endif + if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss)) + continue; + if (sscanf(it->c_str(), "verbose=%d", &verbose)) + continue; + if (sscanf(it->c_str(), "enable=%d", &enabled)) + continue; + std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token + << std::endl; + } + } + } +}; + +#if (LLVM_VERSION) >= 40 +extern "C" { +int __attribute__((weak)) __archer_get_omp_status(); +void __attribute__((weak)) __tsan_flush_memory() {} +} +#endif +ArcherFlags *archer_flags; + +// The following definitions are pasted from "llvm/Support/Compiler.h" to allow +// the code +// to be compiled with other compilers like gcc: + +#ifndef TsanHappensBefore +// Thread Sanitizer is a tool that finds races in code. +// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . +// tsan detects these exact functions by name. +extern "C" { +#if (defined __APPLE__ && defined __MACH__) +static void AnnotateHappensAfter(const char *file, int line, + const volatile void *cv) { + void (*fptr)(const char *, int, const volatile void *); + + fptr = (void (*)(const char *, int, const volatile void *))dlsym( + RTLD_DEFAULT, "AnnotateHappensAfter"); + (*fptr)(file, line, cv); +} +static void AnnotateHappensBefore(const char *file, int line, + const volatile void *cv) { + void (*fptr)(const char *, int, const volatile void *); + + fptr = (void (*)(const char *, int, const volatile void *))dlsym( + RTLD_DEFAULT, "AnnotateHappensBefore"); + (*fptr)(file, line, cv); +} +static void AnnotateIgnoreWritesBegin(const char *file, int line) { + void (*fptr)(const char *, int); + + fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT, + "AnnotateIgnoreWritesBegin"); + (*fptr)(file, line); +} +static void AnnotateIgnoreWritesEnd(const char *file, int line) { + void (*fptr)(const char *, int); + + fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT, + "AnnotateIgnoreWritesEnd"); + (*fptr)(file, line); +} +static void AnnotateNewMemory(const char *file, int line, + const volatile void *cv, size_t size) { + void (*fptr)(const char *, int, const volatile void *, size_t); + + fptr = (void (*)(const char *, int, const volatile void *, size_t))dlsym( + RTLD_DEFAULT, "AnnotateNewMemory"); + (*fptr)(file, line, cv, size); +} +static int RunningOnValgrind() { + int (*fptr)(); + + fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind"); + if (fptr && fptr != RunningOnValgrind) + runOnTsan = 0; + return 0; +} +#else +void __attribute__((weak)) +AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {} +void __attribute__((weak)) +AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {} +void __attribute__((weak)) +AnnotateIgnoreWritesBegin(const char *file, int line) {} +void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) { +} +void __attribute__((weak)) +AnnotateNewMemory(const char *file, int line, const volatile void *cv, + size_t size) {} +int __attribute__((weak)) RunningOnValgrind() { + runOnTsan = 0; + return 0; +} +#endif +} + +// This marker is used to define a happens-before arc. The race detector will +// infer an arc from the begin to the end when they share the same pointer +// argument. +#define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv) + +// This marker defines the destination of a happens-before arc. +#define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv) + +// Ignore any races on writes between here and the next TsanIgnoreWritesEnd. +#define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + +// Resume checking for racy writes. +#define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + +// We don't really delete the clock for now +#define TsanDeleteClock(cv) + +// newMemory +#define TsanNewMemory(addr, size) \ + AnnotateNewMemory(__FILE__, __LINE__, addr, size) +#define TsanFreeMemory(addr, size) \ + AnnotateNewMemory(__FILE__, __LINE__, addr, size) +#endif + +/// Required OMPT inquiry functions. +static ompt_get_parallel_info_t ompt_get_parallel_info; +static ompt_get_thread_data_t ompt_get_thread_data; + +typedef uint64_t ompt_tsan_clockid; + +static uint64_t my_next_id() { + static uint64_t ID = 0; + uint64_t ret = __sync_fetch_and_add(&ID, 1); + return ret; +} + +// Data structure to provide a threadsafe pool of reusable objects. +// DataPool<Type of objects, Size of blockalloc> +template <typename T, int N> struct DataPool { + std::mutex DPMutex; + std::stack<T *> DataPointer; + std::list<void *> memory; + int total; + + void newDatas() { + // prefix the Data with a pointer to 'this', allows to return memory to + // 'this', + // without explicitly knowing the source. + // + // To reduce lock contention, we use thread local DataPools, but Data + // objects move to other threads. + // The strategy is to get objects from local pool. Only if the object moved + // to another + // thread, we might see a penalty on release (returnData). + // For "single producer" pattern, a single thread creates tasks, these are + // executed by other threads. + // The master will have a high demand on TaskData, so return after use. + struct pooldata { + DataPool<T, N> *dp; + T data; + }; + // We alloc without initialize the memory. We cannot call constructors. + // Therfore use malloc! + pooldata *datas = (pooldata *)malloc(sizeof(pooldata) * N); + memory.push_back(datas); + for (int i = 0; i < N; i++) { + datas[i].dp = this; + DataPointer.push(&(datas[i].data)); + } + total += N; + } + + T *getData() { + T *ret; + DPMutex.lock(); + if (DataPointer.empty()) + newDatas(); + ret = DataPointer.top(); + DataPointer.pop(); + DPMutex.unlock(); + return ret; + } + + void returnData(T *data) { + DPMutex.lock(); + DataPointer.push(data); + DPMutex.unlock(); + } + + void getDatas(int n, T **datas) { + DPMutex.lock(); + for (int i = 0; i < n; i++) { + if (DataPointer.empty()) + newDatas(); + datas[i] = DataPointer.top(); + DataPointer.pop(); + } + DPMutex.unlock(); + } + + void returnDatas(int n, T **datas) { + DPMutex.lock(); + for (int i = 0; i < n; i++) { + DataPointer.push(datas[i]); + } + DPMutex.unlock(); + } + + DataPool() : DPMutex(), DataPointer(), total(0) {} + + ~DataPool() { + // we assume all memory is returned when the thread finished / destructor is + // called + for (auto i : memory) + if (i) + free(i); + } +}; + +// This function takes care to return the data to the originating DataPool +// A pointer to the originating DataPool is stored just before the actual data. +template <typename T, int N> static void retData(void *data) { + ((DataPool<T, N> **)data)[-1]->returnData((T *)data); +} + +struct ParallelData; +__thread DataPool<ParallelData, 4> *pdp; + +/// Data structure to store additional information for parallel regions. +struct ParallelData { + + // Parallel fork is just another barrier, use Barrier[1] + + /// Two addresses for relationships with barriers. + ompt_tsan_clockid Barrier[2]; + + void *GetParallelPtr() { return &(Barrier[1]); } + + void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); } + + ~ParallelData() { + TsanDeleteClock(&(Barrier[0])); + TsanDeleteClock(&(Barrier[1])); + } + // overload new/delete to use DataPool for memory management. + void *operator new(size_t size) { return pdp->getData(); } + void operator delete(void *p, size_t) { retData<ParallelData, 4>(p); } +}; + +static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) { + return reinterpret_cast<ParallelData *>(parallel_data->ptr); +} + +struct Taskgroup; +__thread DataPool<Taskgroup, 4> *tgp; + +/// Data structure to support stacking of taskgroups and allow synchronization. +struct Taskgroup { + /// Its address is used for relationships of the taskgroup's task set. + ompt_tsan_clockid Ptr; + + /// Reference to the parent taskgroup. + Taskgroup *Parent; + + Taskgroup(Taskgroup *Parent) : Parent(Parent) {} + ~Taskgroup() { TsanDeleteClock(&Ptr); } + + void *GetPtr() { return &Ptr; } + // overload new/delete to use DataPool for memory management. + void *operator new(size_t size) { return tgp->getData(); } + void operator delete(void *p, size_t) { retData<Taskgroup, 4>(p); } +}; + +struct TaskData; +__thread DataPool<TaskData, 4> *tdp; + +/// Data structure to store additional information for tasks. +struct TaskData { + /// Its address is used for relationships of this task. + ompt_tsan_clockid Task; + + /// Child tasks use its address to declare a relationship to a taskwait in + /// this task. + ompt_tsan_clockid Taskwait; + + /// Whether this task is currently executing a barrier. + bool InBarrier; + + /// Whether this task is an included task. + bool Included; + + /// Index of which barrier to use next. + char BarrierIndex; + + /// Count how often this structure has been put into child tasks + 1. + std::atomic_int RefCount; + + /// Reference to the parent that created this task. + TaskData *Parent; + + /// Reference to the implicit task in the stack above this task. + TaskData *ImplicitTask; + + /// Reference to the team of this task. + ParallelData *Team; + + /// Reference to the current taskgroup that this task either belongs to or + /// that it just created. + Taskgroup *TaskGroup; + + /// Dependency information for this task. + ompt_dependence_t *Dependencies; + + /// Number of dependency entries. + unsigned DependencyCount; + + void *PrivateData; + size_t PrivateDataSize; + + int execution; + int freed; + + TaskData(TaskData *Parent) + : InBarrier(false), Included(false), BarrierIndex(0), RefCount(1), + Parent(Parent), ImplicitTask(nullptr), Team(Parent->Team), + TaskGroup(nullptr), DependencyCount(0), execution(0), freed(0) { + if (Parent != nullptr) { + Parent->RefCount++; + // Copy over pointer to taskgroup. This task may set up its own stack + // but for now belongs to its parent's taskgroup. + TaskGroup = Parent->TaskGroup; + } + } + + TaskData(ParallelData *Team = nullptr) + : InBarrier(false), Included(false), BarrierIndex(0), RefCount(1), + Parent(nullptr), ImplicitTask(this), Team(Team), TaskGroup(nullptr), + DependencyCount(0), execution(1), freed(0) {} + + ~TaskData() { + TsanDeleteClock(&Task); + TsanDeleteClock(&Taskwait); + } + + void *GetTaskPtr() { return &Task; } + + void *GetTaskwaitPtr() { return &Taskwait; } + // overload new/delete to use DataPool for memory management. + void *operator new(size_t size) { return tdp->getData(); } + void operator delete(void *p, size_t) { retData<TaskData, 4>(p); } +}; + +static inline TaskData *ToTaskData(ompt_data_t *task_data) { + return reinterpret_cast<TaskData *>(task_data->ptr); +} + +static inline void *ToInAddr(void *OutAddr) { + // FIXME: This will give false negatives when a second variable lays directly + // behind a variable that only has a width of 1 byte. + // Another approach would be to "negate" the address or to flip the + // first bit... + return reinterpret_cast<char *>(OutAddr) + 1; +} + +/// Store a mutex for each wait_id to resolve race condition with callbacks. +std::unordered_map<ompt_wait_id_t, std::mutex> Locks; +std::mutex LocksMutex; + +static void ompt_tsan_thread_begin(ompt_thread_t thread_type, + ompt_data_t *thread_data) { + pdp = new DataPool<ParallelData, 4>; + TsanNewMemory(pdp, sizeof(pdp)); + tgp = new DataPool<Taskgroup, 4>; + TsanNewMemory(tgp, sizeof(tgp)); + tdp = new DataPool<TaskData, 4>; + TsanNewMemory(tdp, sizeof(tdp)); + thread_data->value = my_next_id(); +} + +static void ompt_tsan_thread_end(ompt_data_t *thread_data) { + delete pdp; + delete tgp; + delete tdp; +} + +/// OMPT event callbacks for handling parallel regions. + +static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data, + const ompt_frame_t *parent_task_frame, + ompt_data_t *parallel_data, + uint32_t requested_team_size, + int flag, + const void *codeptr_ra) { + ParallelData *Data = new ParallelData; + parallel_data->ptr = Data; + + TsanHappensBefore(Data->GetParallelPtr()); +} + +static void ompt_tsan_parallel_end(ompt_data_t *parallel_data, + ompt_data_t *task_data, + int flag, + const void *codeptr_ra) { + ParallelData *Data = ToParallelData(parallel_data); + TsanHappensAfter(Data->GetBarrierPtr(0)); + TsanHappensAfter(Data->GetBarrierPtr(1)); + + delete Data; + +#if (LLVM_VERSION >= 40) + if (&__archer_get_omp_status) { + if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow) + __tsan_flush_memory(); + } +#endif + +} + +static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int team_size, + unsigned int thread_num, + int type) { + switch (endpoint) { + case ompt_scope_begin: + task_data->ptr = new TaskData(ToParallelData(parallel_data)); + TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr()); + break; + case ompt_scope_end: + TaskData *Data = ToTaskData(task_data); + assert(Data->freed == 0 && "Implicit task end should only be called once!"); + Data->freed = 1; + assert(Data->RefCount == 1 && + "All tasks should have finished at the implicit barrier!"); + delete Data; + break; + } +} + +static void ompt_tsan_sync_region(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + TaskData *Data = ToTaskData(task_data); + switch (endpoint) { + case ompt_scope_begin: + switch (kind) { + case ompt_sync_region_barrier_implementation: + case ompt_sync_region_barrier_implicit: + case ompt_sync_region_barrier_explicit: + case ompt_sync_region_barrier: { + char BarrierIndex = Data->BarrierIndex; + TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex)); + + if (hasReductionCallback < ompt_set_always) { + // We ignore writes inside the barrier. These would either occur during + // 1. reductions performed by the runtime which are guaranteed to be + // race-free. + // 2. execution of another task. + // For the latter case we will re-enable tracking in task_switch. + Data->InBarrier = true; + TsanIgnoreWritesBegin(); + } + + break; + } + + case ompt_sync_region_taskwait: + break; + + case ompt_sync_region_taskgroup: + Data->TaskGroup = new Taskgroup(Data->TaskGroup); + break; + + default: + break; + } + break; + case ompt_scope_end: + switch (kind) { + case ompt_sync_region_barrier_implementation: + case ompt_sync_region_barrier_implicit: + case ompt_sync_region_barrier_explicit: + case ompt_sync_region_barrier: { + if (hasReductionCallback < ompt_set_always) { + // We want to track writes after the barrier again. + Data->InBarrier = false; + TsanIgnoreWritesEnd(); + } + + char BarrierIndex = Data->BarrierIndex; + // Barrier will end after it has been entered by all threads. + if (parallel_data) + TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex)); + + // It is not guaranteed that all threads have exited this barrier before + // we enter the next one. So we will use a different address. + // We are however guaranteed that this current barrier is finished + // by the time we exit the next one. So we can then reuse the first + // address. + Data->BarrierIndex = (BarrierIndex + 1) % 2; + break; + } + + case ompt_sync_region_taskwait: { + if (Data->execution > 1) + TsanHappensAfter(Data->GetTaskwaitPtr()); + break; + } + + case ompt_sync_region_taskgroup: { + assert(Data->TaskGroup != nullptr && + "Should have at least one taskgroup!"); + + TsanHappensAfter(Data->TaskGroup->GetPtr()); + + // Delete this allocated taskgroup, all descendent task are finished by + // now. + Taskgroup *Parent = Data->TaskGroup->Parent; + delete Data->TaskGroup; + Data->TaskGroup = Parent; + break; + } + + default: + break; + } + break; + } +} + +static void ompt_tsan_reduction(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + switch (endpoint) { + case ompt_scope_begin: + switch (kind) { + case ompt_sync_region_reduction: + TsanIgnoreWritesBegin(); + break; + default: + break; + } + break; + case ompt_scope_end: + switch (kind) { + case ompt_sync_region_reduction: + TsanIgnoreWritesEnd(); + break; + default: + break; + } + break; + } +} + +/// OMPT event callbacks for handling tasks. + +static void ompt_tsan_task_create( + ompt_data_t *parent_task_data, /* id of parent task */ + const ompt_frame_t *parent_frame, /* frame data for parent task */ + ompt_data_t *new_task_data, /* id of created task */ + int type, int has_dependences, + const void *codeptr_ra) /* pointer to outlined function */ +{ + TaskData *Data; + assert(new_task_data->ptr == NULL && + "Task data should be initialized to NULL"); + if (type & ompt_task_initial) { + ompt_data_t *parallel_data; + int team_size = 1; + ompt_get_parallel_info(0, ¶llel_data, &team_size); + ParallelData *PData = new ParallelData; + parallel_data->ptr = PData; + + Data = new TaskData(PData); + new_task_data->ptr = Data; + } else if (type & ompt_task_undeferred) { + Data = new TaskData(ToTaskData(parent_task_data)); + new_task_data->ptr = Data; + Data->Included = true; + } else if (type & ompt_task_explicit || type & ompt_task_target) { + Data = new TaskData(ToTaskData(parent_task_data)); + new_task_data->ptr = Data; + + // Use the newly created address. We cannot use a single address from the + // parent because that would declare wrong relationships with other + // sibling tasks that may be created before this task is started! + TsanHappensBefore(Data->GetTaskPtr()); + ToTaskData(parent_task_data)->execution++; + } +} + +static void ompt_tsan_task_schedule(ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) { + TaskData *FromTask = ToTaskData(first_task_data); + TaskData *ToTask = ToTaskData(second_task_data); + + if (ToTask->Included && prior_task_status != ompt_task_complete) + return; // No further synchronization for begin included tasks + if (FromTask->Included && prior_task_status == ompt_task_complete) { + // Just delete the task: + while (FromTask != nullptr && --FromTask->RefCount == 0) { + TaskData *Parent = FromTask->Parent; + if (FromTask->DependencyCount > 0) { + delete[] FromTask->Dependencies; + } + delete FromTask; + FromTask = Parent; + } + return; + } + + if (ToTask->execution == 0) { + ToTask->execution++; + // 1. Task will begin execution after it has been created. + TsanHappensAfter(ToTask->GetTaskPtr()); + for (unsigned i = 0; i < ToTask->DependencyCount; i++) { + ompt_dependence_t *Dependency = &ToTask->Dependencies[i]; + + TsanHappensAfter(Dependency->variable.ptr); + // in and inout dependencies are also blocked by prior in dependencies! + if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { + TsanHappensAfter(ToInAddr(Dependency->variable.ptr)); + } + } + } else { + // 2. Task will resume after it has been switched away. + TsanHappensAfter(ToTask->GetTaskPtr()); + } + + if (prior_task_status != ompt_task_complete) { + ToTask->ImplicitTask = FromTask->ImplicitTask; + assert(ToTask->ImplicitTask != NULL && + "A task belongs to a team and has an implicit task on the stack"); + } + + // Task may be resumed at a later point in time. + TsanHappensBefore(FromTask->GetTaskPtr()); + + if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) { + // We want to ignore writes in the runtime code during barriers, + // but not when executing tasks with user code! + TsanIgnoreWritesEnd(); + } + + if (prior_task_status == ompt_task_complete) { // task finished + + // Task will finish before a barrier in the surrounding parallel region ... + ParallelData *PData = FromTask->Team; + TsanHappensBefore( + PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex)); + + // ... and before an eventual taskwait by the parent thread. + TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr()); + + if (FromTask->TaskGroup != nullptr) { + // This task is part of a taskgroup, so it will finish before the + // corresponding taskgroup_end. + TsanHappensBefore(FromTask->TaskGroup->GetPtr()); + } + for (unsigned i = 0; i < FromTask->DependencyCount; i++) { + ompt_dependence_t *Dependency = &FromTask->Dependencies[i]; + + // in dependencies block following inout and out dependencies! + TsanHappensBefore(ToInAddr(Dependency->variable.ptr)); + if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { + TsanHappensBefore(Dependency->variable.ptr); + } + } + while (FromTask != nullptr && --FromTask->RefCount == 0) { + TaskData *Parent = FromTask->Parent; + if (FromTask->DependencyCount > 0) { + delete[] FromTask->Dependencies; + } + delete FromTask; + FromTask = Parent; + } + } + if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) { + // We re-enter runtime code which currently performs a barrier. + TsanIgnoreWritesBegin(); + } +} + +static void ompt_tsan_dependences(ompt_data_t *task_data, + const ompt_dependence_t *deps, + int ndeps) { + if (ndeps > 0) { + // Copy the data to use it in task_switch and task_end. + TaskData *Data = ToTaskData(task_data); + Data->Dependencies = new ompt_dependence_t[ndeps]; + std::memcpy(Data->Dependencies, deps, + sizeof(ompt_dependence_t) * ndeps); + Data->DependencyCount = ndeps; + + // This callback is executed before this task is first started. + TsanHappensBefore(Data->GetTaskPtr()); + } +} + +/// OMPT event callbacks for handling locking. +static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + + // Acquire our own lock to make sure that + // 1. the previous release has finished. + // 2. the next acquire doesn't start before we have finished our release. + LocksMutex.lock(); + std::mutex &Lock = Locks[wait_id]; + LocksMutex.unlock(); + + Lock.lock(); + TsanHappensAfter(&Lock); +} + +static void ompt_tsan_mutex_released(ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + LocksMutex.lock(); + std::mutex &Lock = Locks[wait_id]; + LocksMutex.unlock(); + TsanHappensBefore(&Lock); + + Lock.unlock(); +} + +// callback , signature , variable to store result , required support level +#define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \ + do { \ + ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \ + result = ompt_set_callback(ompt_callback_##event, \ + (ompt_callback_t)tsan_##event); \ + if (result < level) \ + printf("Registered callback '" #event "' is not supported at " #level " (%i)\n", \ + result); \ + } while (0) + +#define SET_CALLBACK_T(event, type) \ + do { \ + int res; \ + SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \ + } while (0) + +#define SET_CALLBACK(event) SET_CALLBACK_T(event, event) + +static int ompt_tsan_initialize(ompt_function_lookup_t lookup, + int device_num, + ompt_data_t *tool_data) { + const char *options = getenv("ARCHER_OPTIONS"); + archer_flags = new ArcherFlags(options); + + ompt_set_callback_t ompt_set_callback = + (ompt_set_callback_t)lookup("ompt_set_callback"); + if (ompt_set_callback == NULL) { + std::cerr << "Could not set callback, exiting..." << std::endl; + std::exit(1); + } + ompt_get_parallel_info = + (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info"); + ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data"); + + if (ompt_get_parallel_info == NULL) { + fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', " + "exiting...\n"); + exit(1); + } + + SET_CALLBACK(thread_begin); + SET_CALLBACK(thread_end); + SET_CALLBACK(parallel_begin); + SET_CALLBACK(implicit_task); + SET_CALLBACK(sync_region); + SET_CALLBACK(parallel_end); + + SET_CALLBACK(task_create); + SET_CALLBACK(task_schedule); + SET_CALLBACK(dependences); + + SET_CALLBACK_T(mutex_acquired, mutex); + SET_CALLBACK_T(mutex_released, mutex); + SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback, ompt_set_never); + return 1; // success +} + +static void ompt_tsan_finalize(ompt_data_t *tool_data) { + if (archer_flags->print_max_rss) { + struct rusage end; + getrusage(RUSAGE_SELF, &end); + printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss); + } + + if (archer_flags) + delete archer_flags; +} + +extern "C" +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + const char *options = getenv("ARCHER_OPTIONS"); + archer_flags = new ArcherFlags(options); + if (!archer_flags->enabled) + { + if (archer_flags->verbose) + std::cout << "Archer disabled, stopping operation" + << std::endl; + delete archer_flags; + return NULL; + } + + static ompt_start_tool_result_t ompt_start_tool_result = { + &ompt_tsan_initialize, &ompt_tsan_finalize, {0}}; + runOnTsan=1; + RunningOnValgrind(); + if (!runOnTsan) // if we are not running on TSAN, give a different tool the + // chance to be loaded + { + if (archer_flags->verbose) + std::cout << "Archer detected OpenMP application without TSan " + "stopping operation" + << std::endl; + delete archer_flags; + return NULL; + } + + if (archer_flags->verbose) + std::cout << "Archer detected OpenMP application with TSan, supplying " + "OpenMP synchronization semantics" + << std::endl; + return &ompt_start_tool_result; +} |