diff options
author | Greg Still <stillgs@us.ibm.com> | 2015-10-14 06:41:37 -0500 |
---|---|---|
committer | Gregory S. Still <stillgs@us.ibm.com> | 2016-02-01 10:33:43 -0600 |
commit | 0f6e3e26ae2fe0df51b228b79c48823c7634334c (patch) | |
tree | 2838a57fce23e0330cda4d0ee1ad789a46b67913 /hwpf | |
parent | 369f1c55b12e568b4cd96d9c8bd131ef6ae9b964 (diff) | |
download | talos-sbe-0f6e3e26ae2fe0df51b228b79c48823c7634334c.tar.gz talos-sbe-0f6e3e26ae2fe0df51b228b79c48823c7634334c.zip |
PPE FAPI2 Delay support
- Addressed comments
- Updated simulation delay to account for PPE simulation cycle rate
- Fix simulation cycle equation to properly group the numerator
Change-Id: I97050f96fdbe726a9216070cd4892dfb266052cf
Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/21157
Tested-by: Jenkins Server
Reviewed-by: Sachin Gupta <sgupta2m@in.ibm.com>
Reviewed-by: Gregory S. Still <stillgs@us.ibm.com>
Diffstat (limited to 'hwpf')
-rw-r--r-- | hwpf/include/utils.H | 52 | ||||
-rw-r--r-- | hwpf/src/plat/plat_utils.C | 81 |
2 files changed, 122 insertions, 11 deletions
diff --git a/hwpf/include/utils.H b/hwpf/include/utils.H index 69dc5838..17635e18 100644 --- a/hwpf/include/utils.H +++ b/hwpf/include/utils.H @@ -70,14 +70,50 @@ uint64_t revle64(uint64_t i_x); namespace fapi2 { - /// @brief Delay this thread. - /// @param[in] i_nanoSeconds nanoseconds to sleep - /// @param[in] i_simCycles count of Awan cycles to advance - /// @return ReturnCode. Zero on success, else platform specified error. - inline ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles) - { - return FAPI2_RC_SUCCESS; - } +/// +/// @brief Delay this thread. Hostboot will use the nanoseconds parameter +/// and make a syscall to nanosleep. While in the syscall, the hostboot +/// kernel will continue to consume CPU cycles as it looks for a runnable +/// task. When the delay time expires, the task becomes runnable and will soon +/// return from the syscall. Callers of delay() in the hostboot environment +/// will likely have to know the mHz clock speed they are running on and +/// compute a non-zero value for i_nanoSeconds. +/// +/// On the FSP, it was sometimes acceptable to just provide zero for the +/// sleep delay time, causing the task to yield its time slice. By the +/// time the calling task could run again, it was pretty certain enough +/// host cycles had past. This is probably not acceptable in +/// the hostboot environment. Callers should calculate and provide a +/// sleep value in nanoseconds relative to host clock speed. +/// +/// On FSP when VBU is the target, then the i_simCycles parameter will be +/// used instead. The FSP needs to use the simdispatcher client/server +/// API and issue a command to the awan to advance the simulation the +/// specified number of cycles. +/// +/// On SBE when __FAPI_DELAY_SIM__ is defined, then the i_simCycles parameter +/// will be used instead and will use the number passed. The build parameter +/// __FAPI_DELAY_SIM_CYCLES__ allows the delay to adjust for the number of +/// simulation cycles that the PPE engine is running at. The delay algorithm +/// takes the i_simCycles parameter, subtracts the loop overhead instructions +/// times __FAPI_DELAY_SIM_CYCLES__ and then divides the remainder by the +/// number of loop instructions times __FAPI_DELAY_SIM_CYCLES__. +/// +/// On SBE when __FAPI_DELAY_SIM__ is NOT defined, the nanoseconds parameter +/// will bus used to loop on the a call to pk_timebase32_get() function to +/// determine the elapsed time. pk_sleep() is NOT used as there are not +/// other threads to dispatch. +/// +/// @param[in] i_nanoSeconds nanoseconds to sleep +/// @param[in] i_simCycles count of Awan cycles to advance +/// @param[in] i_fixed Determination, for DFT, if this time is +/// fixed or not. Defaults to non-fixed +/// +/// @return ReturnCode. Zero on success, else platform specified error. +/// +ReturnCode delay(uint64_t i_nanoSeconds, + uint64_t i_simCycles, + bool i_fixed = false); } #endif // FAPI2_UTILS_H_ diff --git a/hwpf/src/plat/plat_utils.C b/hwpf/src/plat/plat_utils.C index 18aa4444..d056b85f 100644 --- a/hwpf/src/plat/plat_utils.C +++ b/hwpf/src/plat/plat_utils.C @@ -32,9 +32,12 @@ #ifndef __PPE__ #include <error_info.H> +#endif namespace fapi2 { + +#ifndef __PPE__ /// /// @brief Log an error. /// @@ -121,16 +124,90 @@ e FAPI_DBG("busCallouts: %lu", ei->iv_busCallouts.size()); io_rc.forgetData(); } +#endif /// /// @brief Delay this thread. /// - ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles) + ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles, bool i_fixed = false) { // void statements to keep the compiler from complaining // about unused variables. static_cast<void>(i_nanoSeconds); static_cast<void>(i_simCycles); + + +#ifndef __FAPI_DELAY_SIM__ + +#define PK_NANOSECONDS_SBE(n) ((PkInterval)((PK_BASE_FREQ_HZ * (PkInterval)(n)) / (1024*1024*1024))) + + PkTimebase target_time; + PkTimebase current_time; + PkMachineContext ctx; + + + // Only execute if nanoSeconds is non-zero (eg a real wait) + if (i_nanoSeconds) + { + // @todo For SBE applications, the time accuracy can be traded off + // for space with the PK_NANOSECONDS_SBE implemenation as the compiler + // use shift operations for the unit normalizing division. + + // The critical section enter/exit set is done to ensure the timebase + // operations are non-interrupible. + + pk_critical_section_enter(&ctx); + // + // The "accurate" version is the next line. + // target_time = pk_timebase32_get() + PK_INTERVAL_SCALE(PK_NANOSECONDS(i_nanoSeconds)); + + target_time = pk_timebase32_get() + PK_INTERVAL_SCALE(PK_NANOSECONDS_SBE(i_nanoSeconds)); + + do + { + current_time = pk_timebase32_get(); + } while (target_time > current_time); + + pk_critical_section_exit(&ctx); + + + } +#else + + // Execute a tight loop that simply counts down the i_simCycles + // value. + + // @todo This can might be optimized with a fused compare branch loop + // Note, though, that subwibnz instruction is optimized for word + // operations. i_simCycles are uint64_t values so the upper + // word values needs to be accounted for. + // + // Need to determine if this optimization is worth the effort. + +#ifndef __FAPI_DELAY_PPE_SIM_CYCLES__ +#define __FAPI_DELAY_PPE_SIM_CYCLES__ 8 +#endif + + static const uint8_t NUM_OVERHEAD_INSTRS = 15; + static const uint8_t NUM_LOOP_INSTRS = 4; + static const uint64_t MIN_DELAY_CYCLES = + ((NUM_OVERHEAD_INSTRS + NUM_LOOP_INSTRS) * __FAPI_DELAY_PPE_SIM_CYCLES__); + + uint64_t l_adjusted_simcycles; + + if (i_simCycles < MIN_DELAY_CYCLES) + l_adjusted_simcycles = MIN_DELAY_CYCLES; + else + l_adjusted_simcycles = i_simCycles; + + uint64_t delay_loop_count = + ((l_adjusted_simcycles - (NUM_OVERHEAD_INSTRS * __FAPI_DELAY_PPE_SIM_CYCLES__)) / + (NUM_LOOP_INSTRS * __FAPI_DELAY_PPE_SIM_CYCLES__)); + + + for (auto i = delay_loop_count; i > 0; --i) {} + +#endif // replace with platform specific implementation return FAPI2_RC_SUCCESS; @@ -158,8 +235,6 @@ revle16(uint16_t i_x) return rx; } -#endif - /// Byte-reverse a 32-bit integer if on a little-endian machine uint32_t |