diff options
Diffstat (limited to 'hwpf/src')
-rw-r--r-- | hwpf/src/plat/plat_utils.C | 81 |
1 files changed, 78 insertions, 3 deletions
diff --git a/hwpf/src/plat/plat_utils.C b/hwpf/src/plat/plat_utils.C index 18aa4444..d056b85f 100644 --- a/hwpf/src/plat/plat_utils.C +++ b/hwpf/src/plat/plat_utils.C @@ -32,9 +32,12 @@ #ifndef __PPE__ #include <error_info.H> +#endif namespace fapi2 { + +#ifndef __PPE__ /// /// @brief Log an error. /// @@ -121,16 +124,90 @@ e FAPI_DBG("busCallouts: %lu", ei->iv_busCallouts.size()); io_rc.forgetData(); } +#endif /// /// @brief Delay this thread. /// - ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles) + ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles, bool i_fixed = false) { // void statements to keep the compiler from complaining // about unused variables. static_cast<void>(i_nanoSeconds); static_cast<void>(i_simCycles); + + +#ifndef __FAPI_DELAY_SIM__ + +#define PK_NANOSECONDS_SBE(n) ((PkInterval)((PK_BASE_FREQ_HZ * (PkInterval)(n)) / (1024*1024*1024))) + + PkTimebase target_time; + PkTimebase current_time; + PkMachineContext ctx; + + + // Only execute if nanoSeconds is non-zero (eg a real wait) + if (i_nanoSeconds) + { + // @todo For SBE applications, the time accuracy can be traded off + // for space with the PK_NANOSECONDS_SBE implemenation as the compiler + // use shift operations for the unit normalizing division. + + // The critical section enter/exit set is done to ensure the timebase + // operations are non-interrupible. + + pk_critical_section_enter(&ctx); + // + // The "accurate" version is the next line. + // target_time = pk_timebase32_get() + PK_INTERVAL_SCALE(PK_NANOSECONDS(i_nanoSeconds)); + + target_time = pk_timebase32_get() + PK_INTERVAL_SCALE(PK_NANOSECONDS_SBE(i_nanoSeconds)); + + do + { + current_time = pk_timebase32_get(); + } while (target_time > current_time); + + pk_critical_section_exit(&ctx); + + + } +#else + + // Execute a tight loop that simply counts down the i_simCycles + // value. + + // @todo This can might be optimized with a fused compare branch loop + // Note, though, that subwibnz instruction is optimized for word + // operations. i_simCycles are uint64_t values so the upper + // word values needs to be accounted for. + // + // Need to determine if this optimization is worth the effort. + +#ifndef __FAPI_DELAY_PPE_SIM_CYCLES__ +#define __FAPI_DELAY_PPE_SIM_CYCLES__ 8 +#endif + + static const uint8_t NUM_OVERHEAD_INSTRS = 15; + static const uint8_t NUM_LOOP_INSTRS = 4; + static const uint64_t MIN_DELAY_CYCLES = + ((NUM_OVERHEAD_INSTRS + NUM_LOOP_INSTRS) * __FAPI_DELAY_PPE_SIM_CYCLES__); + + uint64_t l_adjusted_simcycles; + + if (i_simCycles < MIN_DELAY_CYCLES) + l_adjusted_simcycles = MIN_DELAY_CYCLES; + else + l_adjusted_simcycles = i_simCycles; + + uint64_t delay_loop_count = + ((l_adjusted_simcycles - (NUM_OVERHEAD_INSTRS * __FAPI_DELAY_PPE_SIM_CYCLES__)) / + (NUM_LOOP_INSTRS * __FAPI_DELAY_PPE_SIM_CYCLES__)); + + + for (auto i = delay_loop_count; i > 0; --i) {} + +#endif // replace with platform specific implementation return FAPI2_RC_SUCCESS; @@ -158,8 +235,6 @@ revle16(uint16_t i_x) return rx; } -#endif - /// Byte-reverse a 32-bit integer if on a little-endian machine uint32_t |