summaryrefslogtreecommitdiffstats
path: root/hwpf
diff options
context:
space:
mode:
authorGreg Still <stillgs@us.ibm.com>2015-10-14 06:41:37 -0500
committerGregory S. Still <stillgs@us.ibm.com>2016-02-01 10:33:43 -0600
commit0f6e3e26ae2fe0df51b228b79c48823c7634334c (patch)
tree2838a57fce23e0330cda4d0ee1ad789a46b67913 /hwpf
parent369f1c55b12e568b4cd96d9c8bd131ef6ae9b964 (diff)
downloadtalos-sbe-0f6e3e26ae2fe0df51b228b79c48823c7634334c.tar.gz
talos-sbe-0f6e3e26ae2fe0df51b228b79c48823c7634334c.zip
PPE FAPI2 Delay support
- Addressed comments - Updated simulation delay to account for PPE simulation cycle rate - Fix simulation cycle equation to properly group the numerator Change-Id: I97050f96fdbe726a9216070cd4892dfb266052cf Reviewed-on: http://gfw160.aus.stglabs.ibm.com:8080/gerrit/21157 Tested-by: Jenkins Server Reviewed-by: Sachin Gupta <sgupta2m@in.ibm.com> Reviewed-by: Gregory S. Still <stillgs@us.ibm.com>
Diffstat (limited to 'hwpf')
-rw-r--r--hwpf/include/utils.H52
-rw-r--r--hwpf/src/plat/plat_utils.C81
2 files changed, 122 insertions, 11 deletions
diff --git a/hwpf/include/utils.H b/hwpf/include/utils.H
index 69dc5838..17635e18 100644
--- a/hwpf/include/utils.H
+++ b/hwpf/include/utils.H
@@ -70,14 +70,50 @@ uint64_t revle64(uint64_t i_x);
namespace fapi2
{
- /// @brief Delay this thread.
- /// @param[in] i_nanoSeconds nanoseconds to sleep
- /// @param[in] i_simCycles count of Awan cycles to advance
- /// @return ReturnCode. Zero on success, else platform specified error.
- inline ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles)
- {
- return FAPI2_RC_SUCCESS;
- }
+///
+/// @brief Delay this thread. Hostboot will use the nanoseconds parameter
+/// and make a syscall to nanosleep. While in the syscall, the hostboot
+/// kernel will continue to consume CPU cycles as it looks for a runnable
+/// task. When the delay time expires, the task becomes runnable and will soon
+/// return from the syscall. Callers of delay() in the hostboot environment
+/// will likely have to know the mHz clock speed they are running on and
+/// compute a non-zero value for i_nanoSeconds.
+///
+/// On the FSP, it was sometimes acceptable to just provide zero for the
+/// sleep delay time, causing the task to yield its time slice. By the
+/// time the calling task could run again, it was pretty certain enough
+/// host cycles had past. This is probably not acceptable in
+/// the hostboot environment. Callers should calculate and provide a
+/// sleep value in nanoseconds relative to host clock speed.
+///
+/// On FSP when VBU is the target, then the i_simCycles parameter will be
+/// used instead. The FSP needs to use the simdispatcher client/server
+/// API and issue a command to the awan to advance the simulation the
+/// specified number of cycles.
+///
+/// On SBE when __FAPI_DELAY_SIM__ is defined, then the i_simCycles parameter
+/// will be used instead and will use the number passed. The build parameter
+/// __FAPI_DELAY_SIM_CYCLES__ allows the delay to adjust for the number of
+/// simulation cycles that the PPE engine is running at. The delay algorithm
+/// takes the i_simCycles parameter, subtracts the loop overhead instructions
+/// times __FAPI_DELAY_SIM_CYCLES__ and then divides the remainder by the
+/// number of loop instructions times __FAPI_DELAY_SIM_CYCLES__.
+///
+/// On SBE when __FAPI_DELAY_SIM__ is NOT defined, the nanoseconds parameter
+/// will bus used to loop on the a call to pk_timebase32_get() function to
+/// determine the elapsed time. pk_sleep() is NOT used as there are not
+/// other threads to dispatch.
+///
+/// @param[in] i_nanoSeconds nanoseconds to sleep
+/// @param[in] i_simCycles count of Awan cycles to advance
+/// @param[in] i_fixed Determination, for DFT, if this time is
+/// fixed or not. Defaults to non-fixed
+///
+/// @return ReturnCode. Zero on success, else platform specified error.
+///
+ReturnCode delay(uint64_t i_nanoSeconds,
+ uint64_t i_simCycles,
+ bool i_fixed = false);
}
#endif // FAPI2_UTILS_H_
diff --git a/hwpf/src/plat/plat_utils.C b/hwpf/src/plat/plat_utils.C
index 18aa4444..d056b85f 100644
--- a/hwpf/src/plat/plat_utils.C
+++ b/hwpf/src/plat/plat_utils.C
@@ -32,9 +32,12 @@
#ifndef __PPE__
#include <error_info.H>
+#endif
namespace fapi2
{
+
+#ifndef __PPE__
///
/// @brief Log an error.
///
@@ -121,16 +124,90 @@ e FAPI_DBG("busCallouts: %lu", ei->iv_busCallouts.size());
io_rc.forgetData();
}
+#endif
///
/// @brief Delay this thread.
///
- ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles)
+ ReturnCode delay(uint64_t i_nanoSeconds, uint64_t i_simCycles, bool i_fixed = false)
{
// void statements to keep the compiler from complaining
// about unused variables.
static_cast<void>(i_nanoSeconds);
static_cast<void>(i_simCycles);
+
+
+#ifndef __FAPI_DELAY_SIM__
+
+#define PK_NANOSECONDS_SBE(n) ((PkInterval)((PK_BASE_FREQ_HZ * (PkInterval)(n)) / (1024*1024*1024)))
+
+ PkTimebase target_time;
+ PkTimebase current_time;
+ PkMachineContext ctx;
+
+
+ // Only execute if nanoSeconds is non-zero (eg a real wait)
+ if (i_nanoSeconds)
+ {
+ // @todo For SBE applications, the time accuracy can be traded off
+ // for space with the PK_NANOSECONDS_SBE implemenation as the compiler
+ // use shift operations for the unit normalizing division.
+
+ // The critical section enter/exit set is done to ensure the timebase
+ // operations are non-interrupible.
+
+ pk_critical_section_enter(&ctx);
+ //
+ // The "accurate" version is the next line.
+ // target_time = pk_timebase32_get() + PK_INTERVAL_SCALE(PK_NANOSECONDS(i_nanoSeconds));
+
+ target_time = pk_timebase32_get() + PK_INTERVAL_SCALE(PK_NANOSECONDS_SBE(i_nanoSeconds));
+
+ do
+ {
+ current_time = pk_timebase32_get();
+ } while (target_time > current_time);
+
+ pk_critical_section_exit(&ctx);
+
+
+ }
+#else
+
+ // Execute a tight loop that simply counts down the i_simCycles
+ // value.
+
+ // @todo This can might be optimized with a fused compare branch loop
+ // Note, though, that subwibnz instruction is optimized for word
+ // operations. i_simCycles are uint64_t values so the upper
+ // word values needs to be accounted for.
+ //
+ // Need to determine if this optimization is worth the effort.
+
+#ifndef __FAPI_DELAY_PPE_SIM_CYCLES__
+#define __FAPI_DELAY_PPE_SIM_CYCLES__ 8
+#endif
+
+ static const uint8_t NUM_OVERHEAD_INSTRS = 15;
+ static const uint8_t NUM_LOOP_INSTRS = 4;
+ static const uint64_t MIN_DELAY_CYCLES =
+ ((NUM_OVERHEAD_INSTRS + NUM_LOOP_INSTRS) * __FAPI_DELAY_PPE_SIM_CYCLES__);
+
+ uint64_t l_adjusted_simcycles;
+
+ if (i_simCycles < MIN_DELAY_CYCLES)
+ l_adjusted_simcycles = MIN_DELAY_CYCLES;
+ else
+ l_adjusted_simcycles = i_simCycles;
+
+ uint64_t delay_loop_count =
+ ((l_adjusted_simcycles - (NUM_OVERHEAD_INSTRS * __FAPI_DELAY_PPE_SIM_CYCLES__)) /
+ (NUM_LOOP_INSTRS * __FAPI_DELAY_PPE_SIM_CYCLES__));
+
+
+ for (auto i = delay_loop_count; i > 0; --i) {}
+
+#endif
// replace with platform specific implementation
return FAPI2_RC_SUCCESS;
@@ -158,8 +235,6 @@ revle16(uint16_t i_x)
return rx;
}
-#endif
-
/// Byte-reverse a 32-bit integer if on a little-endian machine
uint32_t
OpenPOWER on IntegriCloud