diff options
author | Jim Cownie <james.h.cownie@intel.com> | 2014-04-09 15:40:23 +0000 |
---|---|---|
committer | Jim Cownie <james.h.cownie@intel.com> | 2014-04-09 15:40:23 +0000 |
commit | 33f7b24d9f704afd80f768cf91f777a65f8f0718 (patch) | |
tree | 9e13f5de0d009a3469dd6a980bb60a6cb7fdc571 /openmp/offload/src/offload_target.cpp | |
parent | 30d56a7b860b9eed3447ae3d96cacd600b58856f (diff) | |
download | bcm5719-llvm-33f7b24d9f704afd80f768cf91f777a65f8f0718.tar.gz bcm5719-llvm-33f7b24d9f704afd80f768cf91f777a65f8f0718.zip |
Add the offload directory which contains the code needed to support
OpenMP 4.0 "target" directives. This will need more work for
generality, but we want to get it out and visible to the community.
llvm-svn: 205909
Diffstat (limited to 'openmp/offload/src/offload_target.cpp')
-rw-r--r-- | openmp/offload/src/offload_target.cpp | 754 |
1 files changed, 754 insertions, 0 deletions
diff --git a/openmp/offload/src/offload_target.cpp b/openmp/offload/src/offload_target.cpp new file mode 100644 index 00000000000..cfc1b0409df --- /dev/null +++ b/openmp/offload/src/offload_target.cpp @@ -0,0 +1,754 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + + +#include "offload_target.h" +#include <stdlib.h> +#include <unistd.h> +#ifdef SEP_SUPPORT +#include <fcntl.h> +#include <sys/ioctl.h> +#endif // SEP_SUPPORT +#include <omp.h> +#include <map> + +// typedef offload_func_with_parms. +// Pointer to function that represents an offloaded entry point. +// The parameters are a temporary fix for parameters on the stack. +typedef void (*offload_func_with_parms)(void *); + +// Target console and file logging +const char *prefix; +int console_enabled = 0; +int offload_report_level = 0; + +// Trace information +static const char* vardesc_direction_as_string[] = { + "NOCOPY", + "IN", + "OUT", + "INOUT" +}; +static const char* vardesc_type_as_string[] = { + "unknown", + "data", + "data_ptr", + "func_ptr", + "void_ptr", + "string_ptr", + "dv", + "dv_data", + "dv_data_slice", + "dv_ptr", + "dv_ptr_data", + "dv_ptr_data_slice", + "cean_var", + "cean_var_ptr", + "c_data_ptr_array" +}; + +int mic_index = -1; +int mic_engines_total = -1; +uint64_t mic_frequency = 0; +int offload_number = 0; +static std::map<void*, RefInfo*> ref_data; +static mutex_t add_ref_lock; + +#ifdef SEP_SUPPORT +static const char* sep_monitor_env = "SEP_MONITOR"; +static bool sep_monitor = false; +static const char* sep_device_env = "SEP_DEVICE"; +static const char* sep_device = "/dev/sep3.8/c"; +static int sep_counter = 0; + +#define SEP_API_IOC_MAGIC 99 +#define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31) +#define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32) + +static void add_ref_count(void * buf, bool created) +{ + mutex_locker_t locker(add_ref_lock); + RefInfo * info = ref_data[buf]; + + if (info) { + info->count++; + } + else { + info = new RefInfo((int)created,(long)1); + } + info->is_added |= created; + ref_data[buf] = info; +} + +static void BufReleaseRef(void * buf) +{ + mutex_locker_t locker(add_ref_lock); + RefInfo * info = ref_data[buf]; + + if (info) { + --info->count; + if (info->count == 0 && info->is_added) { + BufferReleaseRef(buf); + info->is_added = 0; + } + } +} + +static int VTPauseSampling(void) +{ + int ret = -1; + int handle = open(sep_device, O_RDWR); + if (handle > 0) { + ret = ioctl(handle, SEP_IOCTL_PAUSE); + close(handle); + } + return ret; +} + +static int VTResumeSampling(void) +{ + int ret = -1; + int handle = open(sep_device, O_RDWR); + if (handle > 0) { + ret = ioctl(handle, SEP_IOCTL_RESUME); + close(handle); + } + return ret; +} +#endif // SEP_SUPPORT + +void OffloadDescriptor::offload( + uint32_t buffer_count, + void** buffers, + void* misc_data, + uint16_t misc_data_len, + void* return_data, + uint16_t return_data_len +) +{ + FunctionDescriptor *func = (FunctionDescriptor*) misc_data; + const char *name = func->data; + OffloadDescriptor ofld; + char *in_data = 0; + char *out_data = 0; + char *timer_data = 0; + + console_enabled = func->console_enabled; + timer_enabled = func->timer_enabled; + offload_report_level = func->offload_report_level; + offload_number = func->offload_number; + ofld.set_offload_number(func->offload_number); + +#ifdef SEP_SUPPORT + if (sep_monitor) { + if (__sync_fetch_and_add(&sep_counter, 1) == 0) { + OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); + VTResumeSampling(); + } + } +#endif // SEP_SUPPORT + + OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), + c_offload_start_target_func, + "Offload \"%s\" started\n", name); + + // initialize timer data + OFFLOAD_TIMER_INIT(); + + OFFLOAD_TIMER_START(c_offload_target_total_time); + + OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); + + // get input/output buffer addresses + if (func->in_datalen > 0 || func->out_datalen > 0) { + if (func->data_offset != 0) { + in_data = (char*) misc_data + func->data_offset; + out_data = (char*) return_data; + } + else { + char *inout_buf = (char*) buffers[--buffer_count]; + in_data = inout_buf; + out_data = inout_buf; + } + } + + // assign variable descriptors + ofld.m_vars_total = func->vars_num; + if (ofld.m_vars_total > 0) { + uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); + + ofld.m_vars = (VarDesc*) malloc(var_data_len); + memcpy(ofld.m_vars, in_data, var_data_len); + + in_data += var_data_len; + func->in_datalen -= var_data_len; + } + + // timer data + if (func->timer_enabled) { + uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); + + timer_data = out_data; + out_data += timer_data_len; + func->out_datalen -= timer_data_len; + } + + // init Marshallers + ofld.m_in.init_buffer(in_data, func->in_datalen); + ofld.m_out.init_buffer(out_data, func->out_datalen); + + // copy buffers to offload descriptor + std::copy(buffers, buffers + buffer_count, + std::back_inserter(ofld.m_buffers)); + + OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); + + // find offload entry address + OFFLOAD_TIMER_START(c_offload_target_func_lookup); + + offload_func_with_parms entry = (offload_func_with_parms) + __offload_entries.find_addr(name); + + if (entry == NULL) { +#if OFFLOAD_DEBUG > 0 + if (console_enabled > 2) { + __offload_entries.dump(); + } +#endif + LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); + exit(1); + } + + OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); + + OFFLOAD_TIMER_START(c_offload_target_func_time); + + // execute offload entry + entry(&ofld); + + OFFLOAD_TIMER_STOP(c_offload_target_func_time); + + OFFLOAD_TIMER_STOP(c_offload_target_total_time); + + // copy timer data to the buffer + OFFLOAD_TIMER_TARGET_DATA(timer_data); + + OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); + +#ifdef SEP_SUPPORT + if (sep_monitor) { + if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { + OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); + VTPauseSampling(); + } + } +#endif // SEP_SUPPORT +} + +void OffloadDescriptor::merge_var_descs( + VarDesc *vars, + VarDesc2 *vars2, + int vars_total +) +{ + // number of variable descriptors received from host and generated + // locally should match + if (m_vars_total < vars_total) { + LIBOFFLOAD_ERROR(c_merge_var_descs1); + exit(1); + } + + for (int i = 0; i < m_vars_total; i++) { + if (i < vars_total) { + // variable type must match + if (m_vars[i].type.bits != vars[i].type.bits) { + LIBOFFLOAD_ERROR(c_merge_var_descs2); + exit(1); + } + + m_vars[i].ptr = vars[i].ptr; + m_vars[i].into = vars[i].into; + + const char *var_sname = ""; + if (vars2 != NULL) { + if (vars2[i].sname != NULL) { + var_sname = vars2[i].sname; + } + } + OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, + " VarDesc %d, var=%s, %s, %s\n", + i, var_sname, + vardesc_direction_as_string[m_vars[i].direction.bits], + vardesc_type_as_string[m_vars[i].type.src]); + if (vars2 != NULL && vars2[i].dname != NULL) { + OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, + vardesc_type_as_string[m_vars[i].type.dst]); + } + } + OFFLOAD_TRACE(2, + " type_src=%d, type_dstn=%d, direction=%d, " + "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " + "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", + m_vars[i].type.src, + m_vars[i].type.dst, + m_vars[i].direction.bits, + m_vars[i].alloc_if, + m_vars[i].free_if, + m_vars[i].align, + m_vars[i].mic_offset, + m_vars[i].flags.bits, + m_vars[i].offset, + m_vars[i].size, + m_vars[i].count, + m_vars[i].ptr, + m_vars[i].into); + } +} + +void OffloadDescriptor::scatter_copyin_data() +{ + OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); + + OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", + m_in.get_buffer_start(), + m_in.get_buffer_size()); + OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), + m_in.get_buffer_size()); + + // receive data + for (int i = 0; i < m_vars_total; i++) { + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + void** ptr_addr = src_is_for_mic ? + static_cast<void**>(m_vars[i].ptr) : + static_cast<void**>(m_vars[i].into); + int type = src_is_for_mic ? m_vars[i].type.src : + m_vars[i].type.dst; + bool is_static = src_is_for_mic ? + m_vars[i].flags.is_static : + m_vars[i].flags.is_static_dstn; + void *ptr = NULL; + + if (m_vars[i].flags.alloc_disp) { + int64_t offset = 0; + m_in.receive_data(&offset, sizeof(offset)); + m_vars[i].offset = -offset; + } + if (VAR_TYPE_IS_DV_DATA_SLICE(type) || + VAR_TYPE_IS_DV_DATA(type)) { + ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? + reinterpret_cast<ArrDesc*>(ptr_addr) : + *reinterpret_cast<ArrDesc**>(ptr_addr); + ptr_addr = reinterpret_cast<void**>(&dvp->Base); + } + + // Set pointer values + switch (type) { + case c_data_ptr_array: + { + int j = m_vars[i].ptr_arr_offset; + int max_el = j + m_vars[i].count; + char *dst_arr_ptr = (src_is_for_mic)? + *(reinterpret_cast<char**>(m_vars[i].ptr)) : + reinterpret_cast<char*>(m_vars[i].into); + + for (; j < max_el; j++) { + if (src_is_for_mic) { + m_vars[j].ptr = + dst_arr_ptr + m_vars[j].ptr_arr_offset; + } + else { + m_vars[j].into = + dst_arr_ptr + m_vars[j].ptr_arr_offset; + } + } + } + break; + case c_data: + case c_void_ptr: + case c_cean_var: + case c_dv: + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].alloc_if) { + void *buf; + if (m_vars[i].flags.sink_addr) { + m_in.receive_data(&buf, sizeof(buf)); + } + else { + buf = m_buffers.front(); + m_buffers.pop_front(); + } + if (buf) { + if (!is_static) { + if (!m_vars[i].flags.sink_addr) { + // increment buffer reference + OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); + BufferAddRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); + } + add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); + } + ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + + (m_vars[i].flags.is_stack_buf ? + 0 : m_vars[i].offset); + } + *ptr_addr = ptr; + } + else if (m_vars[i].flags.sink_addr) { + void *buf; + m_in.receive_data(&buf, sizeof(buf)); + void *ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + + (m_vars[i].flags.is_stack_buf ? + 0 : m_vars[i].offset); + *ptr_addr = ptr; + } + break; + + case c_func_ptr: + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (m_vars[i].alloc_if) { + void *buf; + if (m_vars[i].flags.sink_addr) { + m_in.receive_data(&buf, sizeof(buf)); + } + else { + buf = m_buffers.front(); + m_buffers.pop_front(); + } + if (buf) { + if (!is_static) { + if (!m_vars[i].flags.sink_addr) { + // increment buffer reference + OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); + BufferAddRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); + } + add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); + } + ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + m_vars[i].offset; + } + *ptr_addr = ptr; + } + else if (m_vars[i].flags.sink_addr) { + void *buf; + m_in.receive_data(&buf, sizeof(buf)); + ptr = static_cast<char*>(buf) + + m_vars[i].mic_offset + m_vars[i].offset; + *ptr_addr = ptr; + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, type); + abort(); + } + // Release obsolete buffers for stack of persistent objects + if (type = c_data_ptr && + m_vars[i].flags.is_stack_buf && + !m_vars[i].direction.bits && + m_vars[i].alloc_if && + m_vars[i].size != 0) { + for (int j=0; j < m_vars[i].size; j++) { + void *buf; + m_in.receive_data(&buf, sizeof(buf)); + BufferReleaseRef(buf); + ref_data.erase(buf); + } + } + // Do copyin + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.in && + !m_vars[i].flags.is_static_dstn) { + int64_t size; + int64_t disp; + char* ptr = m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr); + if (m_vars[i].type.dst == c_cean_var) { + m_in.receive_data((&size), sizeof(int64_t)); + m_in.receive_data((&disp), sizeof(int64_t)); + } + else { + size = m_vars[i].size; + disp = 0; + } + m_in.receive_data(ptr + disp, size); + } + break; + + case c_dv: + if (m_vars[i].direction.bits || + m_vars[i].alloc_if || + m_vars[i].free_if) { + char* ptr = m_vars[i].into ? + static_cast<char*>(m_vars[i].into) : + static_cast<char*>(m_vars[i].ptr); + m_in.receive_data(ptr + sizeof(uint64_t), + m_vars[i].size - sizeof(uint64_t)); + } + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + break; + + case c_func_ptr: + if (m_vars[i].direction.in) { + m_in.receive_func_ptr((const void**) m_vars[i].ptr); + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); + abort(); + } + } + + OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", + m_in.get_tfr_size()); + + OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); + + OFFLOAD_TIMER_START(c_offload_target_compute); +} + +void OffloadDescriptor::gather_copyout_data() +{ + OFFLOAD_TIMER_STOP(c_offload_target_compute); + + OFFLOAD_TIMER_START(c_offload_target_gather_outputs); + + for (int i = 0; i < m_vars_total; i++) { + bool src_is_for_mic = (m_vars[i].direction.out || + m_vars[i].into == NULL); + + switch (m_vars[i].type.src) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + if (m_vars[i].direction.out && + !m_vars[i].flags.is_static) { + m_out.send_data( + static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, + m_vars[i].size); + } + break; + + case c_dv: + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].free_if && + src_is_for_mic && + !m_vars[i].flags.is_static) { + void *buf = *static_cast<char**>(m_vars[i].ptr) - + m_vars[i].mic_offset - + (m_vars[i].flags.is_stack_buf? + 0 : m_vars[i].offset); + if (buf == NULL) { + break; + } + // decrement buffer reference count + OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); + } + break; + + case c_func_ptr: + if (m_vars[i].direction.out) { + m_out.send_func_ptr(*((void**) m_vars[i].ptr)); + } + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (src_is_for_mic && + m_vars[i].free_if && + !m_vars[i].flags.is_static) { + ArrDesc *dvp = (m_vars[i].type.src == c_dv_data || + m_vars[i].type.src == c_dv_data_slice) ? + static_cast<ArrDesc*>(m_vars[i].ptr) : + *static_cast<ArrDesc**>(m_vars[i].ptr); + + void *buf = reinterpret_cast<char*>(dvp->Base) - + m_vars[i].mic_offset - + m_vars[i].offset; + + if (buf == NULL) { + break; + } + + // decrement buffer reference count + OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); + abort(); + } + + if (m_vars[i].into) { + switch (m_vars[i].type.dst) { + case c_data_ptr_array: + break; + case c_data: + case c_void_ptr: + case c_cean_var: + case c_dv: + break; + + case c_string_ptr: + case c_data_ptr: + case c_cean_var_ptr: + case c_dv_ptr: + if (m_vars[i].direction.in && + m_vars[i].free_if && + !m_vars[i].flags.is_static_dstn) { + void *buf = *static_cast<char**>(m_vars[i].into) - + m_vars[i].mic_offset - + (m_vars[i].flags.is_stack_buf? + 0 : m_vars[i].offset); + + if (buf == NULL) { + break; + } + // decrement buffer reference count + OFFLOAD_TIMER_START( + c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP( + c_offload_target_release_buffer_refs); + } + break; + + case c_func_ptr: + break; + + case c_dv_data: + case c_dv_ptr_data: + case c_dv_data_slice: + case c_dv_ptr_data_slice: + if (m_vars[i].free_if && + m_vars[i].direction.in && + !m_vars[i].flags.is_static_dstn) { + ArrDesc *dvp = + (m_vars[i].type.dst == c_dv_data_slice || + m_vars[i].type.dst == c_dv_data) ? + static_cast<ArrDesc*>(m_vars[i].into) : + *static_cast<ArrDesc**>(m_vars[i].into); + void *buf = reinterpret_cast<char*>(dvp->Base) - + m_vars[i].mic_offset - + m_vars[i].offset; + + if (buf == NULL) { + break; + } + // decrement buffer reference count + OFFLOAD_TIMER_START( + c_offload_target_release_buffer_refs); + BufReleaseRef(buf); + OFFLOAD_TIMER_STOP( + c_offload_target_release_buffer_refs); + } + break; + + default: + LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); + abort(); + } + } + } + + OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", + m_out.get_buffer_start(), + m_out.get_buffer_size()); + + OFFLOAD_DEBUG_DUMP_BYTES(2, + m_out.get_buffer_start(), + m_out.get_buffer_size()); + + OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, + "Total copyout data sent to host: [%lld] bytes\n", + m_out.get_tfr_size()); + + OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); +} + +void __offload_target_init(void) +{ +#ifdef SEP_SUPPORT + const char* env_var = getenv(sep_monitor_env); + if (env_var != 0 && *env_var != '\0') { + sep_monitor = atoi(env_var); + } + env_var = getenv(sep_device_env); + if (env_var != 0 && *env_var != '\0') { + sep_device = env_var; + } +#endif // SEP_SUPPORT + + prefix = report_get_message_str(c_report_mic); + + // init frequency + mic_frequency = COIPerfGetCycleFrequency(); +} + +// User-visible offload API + +int _Offload_number_of_devices(void) +{ + return mic_engines_total; +} + +int _Offload_get_device_number(void) +{ + return mic_index; +} + +int _Offload_get_physical_device_number(void) +{ + uint32_t index; + EngineGetIndex(&index); + return index; +} |