summaryrefslogtreecommitdiffstats
path: root/openmp/tools/archer/ompt-tsan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/tools/archer/ompt-tsan.cpp')
-rw-r--r--openmp/tools/archer/ompt-tsan.cpp904
1 files changed, 904 insertions, 0 deletions
diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp
new file mode 100644
index 00000000000..552edf3af32
--- /dev/null
+++ b/openmp/tools/archer/ompt-tsan.cpp
@@ -0,0 +1,904 @@
+/*
+ * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
+ */
+
+ //===----------------------------------------------------------------------===//
+ //
+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ // See https://llvm.org/LICENSE.txt for details.
+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ //
+ //===----------------------------------------------------------------------===//
+
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <atomic>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <inttypes.h>
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <stack>
+#include <list>
+#include <string>
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+
+#if (defined __APPLE__ && defined __MACH__)
+#include <dlfcn.h>
+#endif
+
+#include <sys/resource.h>
+#include "omp-tools.h"
+
+static int runOnTsan;
+static int hasReductionCallback;
+
+class ArcherFlags {
+public:
+#if (LLVM_VERSION) >= 40
+ int flush_shadow;
+#endif
+ int print_max_rss;
+ int verbose;
+ int enabled;
+
+ ArcherFlags(const char *env)
+ :
+#if (LLVM_VERSION) >= 40
+ flush_shadow(0),
+#endif
+ print_max_rss(0), verbose(0), enabled(1) {
+ if (env) {
+ std::vector<std::string> tokens;
+ std::string token;
+ std::string str(env);
+ std::istringstream iss(str);
+ while (std::getline(iss, token, ' '))
+ tokens.push_back(token);
+
+ for (std::vector<std::string>::iterator it = tokens.begin();
+ it != tokens.end(); ++it) {
+#if (LLVM_VERSION) >= 40
+ if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
+ continue;
+#endif
+ if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss))
+ continue;
+ if (sscanf(it->c_str(), "verbose=%d", &verbose))
+ continue;
+ if (sscanf(it->c_str(), "enable=%d", &enabled))
+ continue;
+ std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
+ << std::endl;
+ }
+ }
+ }
+};
+
+#if (LLVM_VERSION) >= 40
+extern "C" {
+int __attribute__((weak)) __archer_get_omp_status();
+void __attribute__((weak)) __tsan_flush_memory() {}
+}
+#endif
+ArcherFlags *archer_flags;
+
+// The following definitions are pasted from "llvm/Support/Compiler.h" to allow
+// the code
+// to be compiled with other compilers like gcc:
+
+#ifndef TsanHappensBefore
+// Thread Sanitizer is a tool that finds races in code.
+// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
+// tsan detects these exact functions by name.
+extern "C" {
+#if (defined __APPLE__ && defined __MACH__)
+static void AnnotateHappensAfter(const char *file, int line,
+ const volatile void *cv) {
+ void (*fptr)(const char *, int, const volatile void *);
+
+ fptr = (void (*)(const char *, int, const volatile void *))dlsym(
+ RTLD_DEFAULT, "AnnotateHappensAfter");
+ (*fptr)(file, line, cv);
+}
+static void AnnotateHappensBefore(const char *file, int line,
+ const volatile void *cv) {
+ void (*fptr)(const char *, int, const volatile void *);
+
+ fptr = (void (*)(const char *, int, const volatile void *))dlsym(
+ RTLD_DEFAULT, "AnnotateHappensBefore");
+ (*fptr)(file, line, cv);
+}
+static void AnnotateIgnoreWritesBegin(const char *file, int line) {
+ void (*fptr)(const char *, int);
+
+ fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT,
+ "AnnotateIgnoreWritesBegin");
+ (*fptr)(file, line);
+}
+static void AnnotateIgnoreWritesEnd(const char *file, int line) {
+ void (*fptr)(const char *, int);
+
+ fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT,
+ "AnnotateIgnoreWritesEnd");
+ (*fptr)(file, line);
+}
+static void AnnotateNewMemory(const char *file, int line,
+ const volatile void *cv, size_t size) {
+ void (*fptr)(const char *, int, const volatile void *, size_t);
+
+ fptr = (void (*)(const char *, int, const volatile void *, size_t))dlsym(
+ RTLD_DEFAULT, "AnnotateNewMemory");
+ (*fptr)(file, line, cv, size);
+}
+static int RunningOnValgrind() {
+ int (*fptr)();
+
+ fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind");
+ if (fptr && fptr != RunningOnValgrind)
+ runOnTsan = 0;
+ return 0;
+}
+#else
+void __attribute__((weak))
+AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {}
+void __attribute__((weak))
+AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {}
+void __attribute__((weak))
+AnnotateIgnoreWritesBegin(const char *file, int line) {}
+void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) {
+}
+void __attribute__((weak))
+AnnotateNewMemory(const char *file, int line, const volatile void *cv,
+ size_t size) {}
+int __attribute__((weak)) RunningOnValgrind() {
+ runOnTsan = 0;
+ return 0;
+}
+#endif
+}
+
+// This marker is used to define a happens-before arc. The race detector will
+// infer an arc from the begin to the end when they share the same pointer
+// argument.
+#define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
+
+// This marker defines the destination of a happens-before arc.
+#define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
+
+// Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
+#define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+// Resume checking for racy writes.
+#define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+
+// We don't really delete the clock for now
+#define TsanDeleteClock(cv)
+
+// newMemory
+#define TsanNewMemory(addr, size) \
+ AnnotateNewMemory(__FILE__, __LINE__, addr, size)
+#define TsanFreeMemory(addr, size) \
+ AnnotateNewMemory(__FILE__, __LINE__, addr, size)
+#endif
+
+/// Required OMPT inquiry functions.
+static ompt_get_parallel_info_t ompt_get_parallel_info;
+static ompt_get_thread_data_t ompt_get_thread_data;
+
+typedef uint64_t ompt_tsan_clockid;
+
+static uint64_t my_next_id() {
+ static uint64_t ID = 0;
+ uint64_t ret = __sync_fetch_and_add(&ID, 1);
+ return ret;
+}
+
+// Data structure to provide a threadsafe pool of reusable objects.
+// DataPool<Type of objects, Size of blockalloc>
+template <typename T, int N> struct DataPool {
+ std::mutex DPMutex;
+ std::stack<T *> DataPointer;
+ std::list<void *> memory;
+ int total;
+
+ void newDatas() {
+ // prefix the Data with a pointer to 'this', allows to return memory to
+ // 'this',
+ // without explicitly knowing the source.
+ //
+ // To reduce lock contention, we use thread local DataPools, but Data
+ // objects move to other threads.
+ // The strategy is to get objects from local pool. Only if the object moved
+ // to another
+ // thread, we might see a penalty on release (returnData).
+ // For "single producer" pattern, a single thread creates tasks, these are
+ // executed by other threads.
+ // The master will have a high demand on TaskData, so return after use.
+ struct pooldata {
+ DataPool<T, N> *dp;
+ T data;
+ };
+ // We alloc without initialize the memory. We cannot call constructors.
+ // Therfore use malloc!
+ pooldata *datas = (pooldata *)malloc(sizeof(pooldata) * N);
+ memory.push_back(datas);
+ for (int i = 0; i < N; i++) {
+ datas[i].dp = this;
+ DataPointer.push(&(datas[i].data));
+ }
+ total += N;
+ }
+
+ T *getData() {
+ T *ret;
+ DPMutex.lock();
+ if (DataPointer.empty())
+ newDatas();
+ ret = DataPointer.top();
+ DataPointer.pop();
+ DPMutex.unlock();
+ return ret;
+ }
+
+ void returnData(T *data) {
+ DPMutex.lock();
+ DataPointer.push(data);
+ DPMutex.unlock();
+ }
+
+ void getDatas(int n, T **datas) {
+ DPMutex.lock();
+ for (int i = 0; i < n; i++) {
+ if (DataPointer.empty())
+ newDatas();
+ datas[i] = DataPointer.top();
+ DataPointer.pop();
+ }
+ DPMutex.unlock();
+ }
+
+ void returnDatas(int n, T **datas) {
+ DPMutex.lock();
+ for (int i = 0; i < n; i++) {
+ DataPointer.push(datas[i]);
+ }
+ DPMutex.unlock();
+ }
+
+ DataPool() : DPMutex(), DataPointer(), total(0) {}
+
+ ~DataPool() {
+ // we assume all memory is returned when the thread finished / destructor is
+ // called
+ for (auto i : memory)
+ if (i)
+ free(i);
+ }
+};
+
+// This function takes care to return the data to the originating DataPool
+// A pointer to the originating DataPool is stored just before the actual data.
+template <typename T, int N> static void retData(void *data) {
+ ((DataPool<T, N> **)data)[-1]->returnData((T *)data);
+}
+
+struct ParallelData;
+__thread DataPool<ParallelData, 4> *pdp;
+
+/// Data structure to store additional information for parallel regions.
+struct ParallelData {
+
+ // Parallel fork is just another barrier, use Barrier[1]
+
+ /// Two addresses for relationships with barriers.
+ ompt_tsan_clockid Barrier[2];
+
+ void *GetParallelPtr() { return &(Barrier[1]); }
+
+ void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
+
+ ~ParallelData() {
+ TsanDeleteClock(&(Barrier[0]));
+ TsanDeleteClock(&(Barrier[1]));
+ }
+ // overload new/delete to use DataPool for memory management.
+ void *operator new(size_t size) { return pdp->getData(); }
+ void operator delete(void *p, size_t) { retData<ParallelData, 4>(p); }
+};
+
+static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
+ return reinterpret_cast<ParallelData *>(parallel_data->ptr);
+}
+
+struct Taskgroup;
+__thread DataPool<Taskgroup, 4> *tgp;
+
+/// Data structure to support stacking of taskgroups and allow synchronization.
+struct Taskgroup {
+ /// Its address is used for relationships of the taskgroup's task set.
+ ompt_tsan_clockid Ptr;
+
+ /// Reference to the parent taskgroup.
+ Taskgroup *Parent;
+
+ Taskgroup(Taskgroup *Parent) : Parent(Parent) {}
+ ~Taskgroup() { TsanDeleteClock(&Ptr); }
+
+ void *GetPtr() { return &Ptr; }
+ // overload new/delete to use DataPool for memory management.
+ void *operator new(size_t size) { return tgp->getData(); }
+ void operator delete(void *p, size_t) { retData<Taskgroup, 4>(p); }
+};
+
+struct TaskData;
+__thread DataPool<TaskData, 4> *tdp;
+
+/// Data structure to store additional information for tasks.
+struct TaskData {
+ /// Its address is used for relationships of this task.
+ ompt_tsan_clockid Task;
+
+ /// Child tasks use its address to declare a relationship to a taskwait in
+ /// this task.
+ ompt_tsan_clockid Taskwait;
+
+ /// Whether this task is currently executing a barrier.
+ bool InBarrier;
+
+ /// Whether this task is an included task.
+ bool Included;
+
+ /// Index of which barrier to use next.
+ char BarrierIndex;
+
+ /// Count how often this structure has been put into child tasks + 1.
+ std::atomic_int RefCount;
+
+ /// Reference to the parent that created this task.
+ TaskData *Parent;
+
+ /// Reference to the implicit task in the stack above this task.
+ TaskData *ImplicitTask;
+
+ /// Reference to the team of this task.
+ ParallelData *Team;
+
+ /// Reference to the current taskgroup that this task either belongs to or
+ /// that it just created.
+ Taskgroup *TaskGroup;
+
+ /// Dependency information for this task.
+ ompt_dependence_t *Dependencies;
+
+ /// Number of dependency entries.
+ unsigned DependencyCount;
+
+ void *PrivateData;
+ size_t PrivateDataSize;
+
+ int execution;
+ int freed;
+
+ TaskData(TaskData *Parent)
+ : InBarrier(false), Included(false), BarrierIndex(0), RefCount(1),
+ Parent(Parent), ImplicitTask(nullptr), Team(Parent->Team),
+ TaskGroup(nullptr), DependencyCount(0), execution(0), freed(0) {
+ if (Parent != nullptr) {
+ Parent->RefCount++;
+ // Copy over pointer to taskgroup. This task may set up its own stack
+ // but for now belongs to its parent's taskgroup.
+ TaskGroup = Parent->TaskGroup;
+ }
+ }
+
+ TaskData(ParallelData *Team = nullptr)
+ : InBarrier(false), Included(false), BarrierIndex(0), RefCount(1),
+ Parent(nullptr), ImplicitTask(this), Team(Team), TaskGroup(nullptr),
+ DependencyCount(0), execution(1), freed(0) {}
+
+ ~TaskData() {
+ TsanDeleteClock(&Task);
+ TsanDeleteClock(&Taskwait);
+ }
+
+ void *GetTaskPtr() { return &Task; }
+
+ void *GetTaskwaitPtr() { return &Taskwait; }
+ // overload new/delete to use DataPool for memory management.
+ void *operator new(size_t size) { return tdp->getData(); }
+ void operator delete(void *p, size_t) { retData<TaskData, 4>(p); }
+};
+
+static inline TaskData *ToTaskData(ompt_data_t *task_data) {
+ return reinterpret_cast<TaskData *>(task_data->ptr);
+}
+
+static inline void *ToInAddr(void *OutAddr) {
+ // FIXME: This will give false negatives when a second variable lays directly
+ // behind a variable that only has a width of 1 byte.
+ // Another approach would be to "negate" the address or to flip the
+ // first bit...
+ return reinterpret_cast<char *>(OutAddr) + 1;
+}
+
+/// Store a mutex for each wait_id to resolve race condition with callbacks.
+std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
+std::mutex LocksMutex;
+
+static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
+ ompt_data_t *thread_data) {
+ pdp = new DataPool<ParallelData, 4>;
+ TsanNewMemory(pdp, sizeof(pdp));
+ tgp = new DataPool<Taskgroup, 4>;
+ TsanNewMemory(tgp, sizeof(tgp));
+ tdp = new DataPool<TaskData, 4>;
+ TsanNewMemory(tdp, sizeof(tdp));
+ thread_data->value = my_next_id();
+}
+
+static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
+ delete pdp;
+ delete tgp;
+ delete tdp;
+}
+
+/// OMPT event callbacks for handling parallel regions.
+
+static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
+ const ompt_frame_t *parent_task_frame,
+ ompt_data_t *parallel_data,
+ uint32_t requested_team_size,
+ int flag,
+ const void *codeptr_ra) {
+ ParallelData *Data = new ParallelData;
+ parallel_data->ptr = Data;
+
+ TsanHappensBefore(Data->GetParallelPtr());
+}
+
+static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ int flag,
+ const void *codeptr_ra) {
+ ParallelData *Data = ToParallelData(parallel_data);
+ TsanHappensAfter(Data->GetBarrierPtr(0));
+ TsanHappensAfter(Data->GetBarrierPtr(1));
+
+ delete Data;
+
+#if (LLVM_VERSION >= 40)
+ if (&__archer_get_omp_status) {
+ if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
+ __tsan_flush_memory();
+ }
+#endif
+
+}
+
+static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ unsigned int team_size,
+ unsigned int thread_num,
+ int type) {
+ switch (endpoint) {
+ case ompt_scope_begin:
+ task_data->ptr = new TaskData(ToParallelData(parallel_data));
+ TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
+ break;
+ case ompt_scope_end:
+ TaskData *Data = ToTaskData(task_data);
+ assert(Data->freed == 0 && "Implicit task end should only be called once!");
+ Data->freed = 1;
+ assert(Data->RefCount == 1 &&
+ "All tasks should have finished at the implicit barrier!");
+ delete Data;
+ break;
+ }
+}
+
+static void ompt_tsan_sync_region(ompt_sync_region_t kind,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra) {
+ TaskData *Data = ToTaskData(task_data);
+ switch (endpoint) {
+ case ompt_scope_begin:
+ switch (kind) {
+ case ompt_sync_region_barrier_implementation:
+ case ompt_sync_region_barrier_implicit:
+ case ompt_sync_region_barrier_explicit:
+ case ompt_sync_region_barrier: {
+ char BarrierIndex = Data->BarrierIndex;
+ TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
+
+ if (hasReductionCallback < ompt_set_always) {
+ // We ignore writes inside the barrier. These would either occur during
+ // 1. reductions performed by the runtime which are guaranteed to be
+ // race-free.
+ // 2. execution of another task.
+ // For the latter case we will re-enable tracking in task_switch.
+ Data->InBarrier = true;
+ TsanIgnoreWritesBegin();
+ }
+
+ break;
+ }
+
+ case ompt_sync_region_taskwait:
+ break;
+
+ case ompt_sync_region_taskgroup:
+ Data->TaskGroup = new Taskgroup(Data->TaskGroup);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ case ompt_scope_end:
+ switch (kind) {
+ case ompt_sync_region_barrier_implementation:
+ case ompt_sync_region_barrier_implicit:
+ case ompt_sync_region_barrier_explicit:
+ case ompt_sync_region_barrier: {
+ if (hasReductionCallback < ompt_set_always) {
+ // We want to track writes after the barrier again.
+ Data->InBarrier = false;
+ TsanIgnoreWritesEnd();
+ }
+
+ char BarrierIndex = Data->BarrierIndex;
+ // Barrier will end after it has been entered by all threads.
+ if (parallel_data)
+ TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
+
+ // It is not guaranteed that all threads have exited this barrier before
+ // we enter the next one. So we will use a different address.
+ // We are however guaranteed that this current barrier is finished
+ // by the time we exit the next one. So we can then reuse the first
+ // address.
+ Data->BarrierIndex = (BarrierIndex + 1) % 2;
+ break;
+ }
+
+ case ompt_sync_region_taskwait: {
+ if (Data->execution > 1)
+ TsanHappensAfter(Data->GetTaskwaitPtr());
+ break;
+ }
+
+ case ompt_sync_region_taskgroup: {
+ assert(Data->TaskGroup != nullptr &&
+ "Should have at least one taskgroup!");
+
+ TsanHappensAfter(Data->TaskGroup->GetPtr());
+
+ // Delete this allocated taskgroup, all descendent task are finished by
+ // now.
+ Taskgroup *Parent = Data->TaskGroup->Parent;
+ delete Data->TaskGroup;
+ Data->TaskGroup = Parent;
+ break;
+ }
+
+ default:
+ break;
+ }
+ break;
+ }
+}
+
+static void ompt_tsan_reduction(ompt_sync_region_t kind,
+ ompt_scope_endpoint_t endpoint,
+ ompt_data_t *parallel_data,
+ ompt_data_t *task_data,
+ const void *codeptr_ra) {
+ switch (endpoint) {
+ case ompt_scope_begin:
+ switch (kind) {
+ case ompt_sync_region_reduction:
+ TsanIgnoreWritesBegin();
+ break;
+ default:
+ break;
+ }
+ break;
+ case ompt_scope_end:
+ switch (kind) {
+ case ompt_sync_region_reduction:
+ TsanIgnoreWritesEnd();
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+}
+
+/// OMPT event callbacks for handling tasks.
+
+static void ompt_tsan_task_create(
+ ompt_data_t *parent_task_data, /* id of parent task */
+ const ompt_frame_t *parent_frame, /* frame data for parent task */
+ ompt_data_t *new_task_data, /* id of created task */
+ int type, int has_dependences,
+ const void *codeptr_ra) /* pointer to outlined function */
+{
+ TaskData *Data;
+ assert(new_task_data->ptr == NULL &&
+ "Task data should be initialized to NULL");
+ if (type & ompt_task_initial) {
+ ompt_data_t *parallel_data;
+ int team_size = 1;
+ ompt_get_parallel_info(0, &parallel_data, &team_size);
+ ParallelData *PData = new ParallelData;
+ parallel_data->ptr = PData;
+
+ Data = new TaskData(PData);
+ new_task_data->ptr = Data;
+ } else if (type & ompt_task_undeferred) {
+ Data = new TaskData(ToTaskData(parent_task_data));
+ new_task_data->ptr = Data;
+ Data->Included = true;
+ } else if (type & ompt_task_explicit || type & ompt_task_target) {
+ Data = new TaskData(ToTaskData(parent_task_data));
+ new_task_data->ptr = Data;
+
+ // Use the newly created address. We cannot use a single address from the
+ // parent because that would declare wrong relationships with other
+ // sibling tasks that may be created before this task is started!
+ TsanHappensBefore(Data->GetTaskPtr());
+ ToTaskData(parent_task_data)->execution++;
+ }
+}
+
+static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
+ ompt_task_status_t prior_task_status,
+ ompt_data_t *second_task_data) {
+ TaskData *FromTask = ToTaskData(first_task_data);
+ TaskData *ToTask = ToTaskData(second_task_data);
+
+ if (ToTask->Included && prior_task_status != ompt_task_complete)
+ return; // No further synchronization for begin included tasks
+ if (FromTask->Included && prior_task_status == ompt_task_complete) {
+ // Just delete the task:
+ while (FromTask != nullptr && --FromTask->RefCount == 0) {
+ TaskData *Parent = FromTask->Parent;
+ if (FromTask->DependencyCount > 0) {
+ delete[] FromTask->Dependencies;
+ }
+ delete FromTask;
+ FromTask = Parent;
+ }
+ return;
+ }
+
+ if (ToTask->execution == 0) {
+ ToTask->execution++;
+ // 1. Task will begin execution after it has been created.
+ TsanHappensAfter(ToTask->GetTaskPtr());
+ for (unsigned i = 0; i < ToTask->DependencyCount; i++) {
+ ompt_dependence_t *Dependency = &ToTask->Dependencies[i];
+
+ TsanHappensAfter(Dependency->variable.ptr);
+ // in and inout dependencies are also blocked by prior in dependencies!
+ if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) {
+ TsanHappensAfter(ToInAddr(Dependency->variable.ptr));
+ }
+ }
+ } else {
+ // 2. Task will resume after it has been switched away.
+ TsanHappensAfter(ToTask->GetTaskPtr());
+ }
+
+ if (prior_task_status != ompt_task_complete) {
+ ToTask->ImplicitTask = FromTask->ImplicitTask;
+ assert(ToTask->ImplicitTask != NULL &&
+ "A task belongs to a team and has an implicit task on the stack");
+ }
+
+ // Task may be resumed at a later point in time.
+ TsanHappensBefore(FromTask->GetTaskPtr());
+
+ if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) {
+ // We want to ignore writes in the runtime code during barriers,
+ // but not when executing tasks with user code!
+ TsanIgnoreWritesEnd();
+ }
+
+ if (prior_task_status == ompt_task_complete) { // task finished
+
+ // Task will finish before a barrier in the surrounding parallel region ...
+ ParallelData *PData = FromTask->Team;
+ TsanHappensBefore(
+ PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex));
+
+ // ... and before an eventual taskwait by the parent thread.
+ TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
+
+ if (FromTask->TaskGroup != nullptr) {
+ // This task is part of a taskgroup, so it will finish before the
+ // corresponding taskgroup_end.
+ TsanHappensBefore(FromTask->TaskGroup->GetPtr());
+ }
+ for (unsigned i = 0; i < FromTask->DependencyCount; i++) {
+ ompt_dependence_t *Dependency = &FromTask->Dependencies[i];
+
+ // in dependencies block following inout and out dependencies!
+ TsanHappensBefore(ToInAddr(Dependency->variable.ptr));
+ if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) {
+ TsanHappensBefore(Dependency->variable.ptr);
+ }
+ }
+ while (FromTask != nullptr && --FromTask->RefCount == 0) {
+ TaskData *Parent = FromTask->Parent;
+ if (FromTask->DependencyCount > 0) {
+ delete[] FromTask->Dependencies;
+ }
+ delete FromTask;
+ FromTask = Parent;
+ }
+ }
+ if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) {
+ // We re-enter runtime code which currently performs a barrier.
+ TsanIgnoreWritesBegin();
+ }
+}
+
+static void ompt_tsan_dependences(ompt_data_t *task_data,
+ const ompt_dependence_t *deps,
+ int ndeps) {
+ if (ndeps > 0) {
+ // Copy the data to use it in task_switch and task_end.
+ TaskData *Data = ToTaskData(task_data);
+ Data->Dependencies = new ompt_dependence_t[ndeps];
+ std::memcpy(Data->Dependencies, deps,
+ sizeof(ompt_dependence_t) * ndeps);
+ Data->DependencyCount = ndeps;
+
+ // This callback is executed before this task is first started.
+ TsanHappensBefore(Data->GetTaskPtr());
+ }
+}
+
+/// OMPT event callbacks for handling locking.
+static void ompt_tsan_mutex_acquired(ompt_mutex_t kind,
+ ompt_wait_id_t wait_id,
+ const void *codeptr_ra) {
+
+ // Acquire our own lock to make sure that
+ // 1. the previous release has finished.
+ // 2. the next acquire doesn't start before we have finished our release.
+ LocksMutex.lock();
+ std::mutex &Lock = Locks[wait_id];
+ LocksMutex.unlock();
+
+ Lock.lock();
+ TsanHappensAfter(&Lock);
+}
+
+static void ompt_tsan_mutex_released(ompt_mutex_t kind,
+ ompt_wait_id_t wait_id,
+ const void *codeptr_ra) {
+ LocksMutex.lock();
+ std::mutex &Lock = Locks[wait_id];
+ LocksMutex.unlock();
+ TsanHappensBefore(&Lock);
+
+ Lock.unlock();
+}
+
+// callback , signature , variable to store result , required support level
+#define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
+ do { \
+ ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
+ result = ompt_set_callback(ompt_callback_##event, \
+ (ompt_callback_t)tsan_##event); \
+ if (result < level) \
+ printf("Registered callback '" #event "' is not supported at " #level " (%i)\n", \
+ result); \
+ } while (0)
+
+#define SET_CALLBACK_T(event, type) \
+ do { \
+ int res; \
+ SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
+ } while (0)
+
+#define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
+
+static int ompt_tsan_initialize(ompt_function_lookup_t lookup,
+ int device_num,
+ ompt_data_t *tool_data) {
+ const char *options = getenv("ARCHER_OPTIONS");
+ archer_flags = new ArcherFlags(options);
+
+ ompt_set_callback_t ompt_set_callback =
+ (ompt_set_callback_t)lookup("ompt_set_callback");
+ if (ompt_set_callback == NULL) {
+ std::cerr << "Could not set callback, exiting..." << std::endl;
+ std::exit(1);
+ }
+ ompt_get_parallel_info =
+ (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
+ ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
+
+ if (ompt_get_parallel_info == NULL) {
+ fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', "
+ "exiting...\n");
+ exit(1);
+ }
+
+ SET_CALLBACK(thread_begin);
+ SET_CALLBACK(thread_end);
+ SET_CALLBACK(parallel_begin);
+ SET_CALLBACK(implicit_task);
+ SET_CALLBACK(sync_region);
+ SET_CALLBACK(parallel_end);
+
+ SET_CALLBACK(task_create);
+ SET_CALLBACK(task_schedule);
+ SET_CALLBACK(dependences);
+
+ SET_CALLBACK_T(mutex_acquired, mutex);
+ SET_CALLBACK_T(mutex_released, mutex);
+ SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback, ompt_set_never);
+ return 1; // success
+}
+
+static void ompt_tsan_finalize(ompt_data_t *tool_data) {
+ if (archer_flags->print_max_rss) {
+ struct rusage end;
+ getrusage(RUSAGE_SELF, &end);
+ printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
+ }
+
+ if (archer_flags)
+ delete archer_flags;
+}
+
+extern "C"
+ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
+ const char *runtime_version) {
+ const char *options = getenv("ARCHER_OPTIONS");
+ archer_flags = new ArcherFlags(options);
+ if (!archer_flags->enabled)
+ {
+ if (archer_flags->verbose)
+ std::cout << "Archer disabled, stopping operation"
+ << std::endl;
+ delete archer_flags;
+ return NULL;
+ }
+
+ static ompt_start_tool_result_t ompt_start_tool_result = {
+ &ompt_tsan_initialize, &ompt_tsan_finalize, {0}};
+ runOnTsan=1;
+ RunningOnValgrind();
+ if (!runOnTsan) // if we are not running on TSAN, give a different tool the
+ // chance to be loaded
+ {
+ if (archer_flags->verbose)
+ std::cout << "Archer detected OpenMP application without TSan "
+ "stopping operation"
+ << std::endl;
+ delete archer_flags;
+ return NULL;
+ }
+
+ if (archer_flags->verbose)
+ std::cout << "Archer detected OpenMP application with TSan, supplying "
+ "OpenMP synchronization semantics"
+ << std::endl;
+ return &ompt_start_tool_result;
+}
OpenPOWER on IntegriCloud