summaryrefslogtreecommitdiffstats
path: root/src/lib/gpe_data.pS
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/gpe_data.pS')
-rwxr-xr-xsrc/lib/gpe_data.pS1585
1 files changed, 1585 insertions, 0 deletions
diff --git a/src/lib/gpe_data.pS b/src/lib/gpe_data.pS
new file mode 100755
index 0000000..2338276
--- /dev/null
+++ b/src/lib/gpe_data.pS
@@ -0,0 +1,1585 @@
+// $Id: gpe_data.pS,v 820.1 2014/08/22 16:33:56 daviddu Exp $
+// $Source: /afs/awd/projects/eclipz/KnowledgeBase/.cvsroot/eclipz/chips/p8/fw820/procedures/lib/gpe_data.pS,v $
+//-----------------------------------------------------------------------------
+// *! (C) Copyright International Business Machines Corp. 2013
+// *! All Rights Reserved -- Property of IBM
+// *! *** IBM Confidential ***
+//-----------------------------------------------------------------------------
+
+/// \file gpe_data.S
+/// \brief GPE procedures for raw data collection
+
+ .nolist
+
+#include "ssx.h"
+#include "pgas.h"
+#include "pgp_config.h"
+#include "gpe.h"
+#include "gpe_pba.h"
+#include "gpe_data.h"
+#include "gpe_scom.h"
+
+ .list
+
+ .oci
+ .text.pore
+
+ .revision_string G_gpe_data_pS_revision, "$Revision: 820.1 $"
+
+/// \cond
+
+////////////////////////////////////////////////////////////////////////////
+// Common Macros
+////////////////////////////////////////////////////////////////////////////
+
+ // Get a full 64-bit SCOM and write to OCI space. Clobbers a Data
+ // register.
+
+ .macro get_scom, dx, scom, chiplet_base, oci_offset, oci_base
+
+ ld (\dx), (\scom), (\chiplet_base)
+ std (\dx), (\oci_offset), (\oci_base)
+
+ .endm
+
+
+ // Tag a data group with TOD[24..56]. This macro clobbers a data
+ // register.
+
+ .macro tag_data_group, base, dx, oci_base, tod_chiplet
+
+ ld (\dx), TOD_VALUE_REG, (\tod_chiplet)
+ extrdi (\dx), (\dx), 32, 24
+ std (\dx), (\base), (\oci_base)
+
+ .endm
+
+
+ // An OCI - OCI copy. Dx gets clobbered
+
+ .macro ocicopy, dx, src_offset, src_base, dst_offset, dst_base
+
+ ld (\dx), (\src_offset), (\src_base)
+ std (\dx), (\dst_offset), (\dst_base)
+
+ .endm
+
+
+////////////////////////////////////////////////////////////////////////////
+// gpe_get_core_data()
+////////////////////////////////////////////////////////////////////////////
+
+ // Macros for gpe_get_core_data().
+
+ // Tag a core data group with TOD[24..56], and optionally with the raw
+ // cycle count. Always clobbers D0 and D1. If called with store=0, the
+ // tag ends up in D0.
+
+ .macro tag_core_data_group, base, oci_base, pc_chiplet, tod_chiplet, \
+ raw=1, store=1
+
+ ld D0, TOD_VALUE_REG, (\tod_chiplet)
+ extrdi D0, D0, 32, 24
+ .if (\raw)
+ sti PC_OCC_SPRC, (\pc_chiplet), SPRN_CORE_RAW_CYCLE
+ ld D1, PC_OCC_SPRD, (\pc_chiplet)
+ rotldi D1, D1, 32
+ or D0, D0, D1
+ .endif
+ .if (\store)
+ std D0, (\base), (\oci_base)
+ .endif
+ .endm
+
+
+ // Get a pair of SCOMs from PC, packing them into a single 64-bit value
+ // and writing them to OCI space. Clobbers D0 and D1. Assumes that
+ // PC_OCC_SPRC is set up for autoincrement access as well.
+ //
+ // This macro takes advantage of the fact that PC-unit SCOMs only
+ // define the lower 32 bits, and the high-32 are 0.
+
+ .macro get_pc_pair, offset, oci_base, chiplet_base
+
+ ld D0, PC_OCC_SPRD, (\chiplet_base)
+ ld D1, PC_OCC_SPRD, (\chiplet_base)
+ rotldi D0, D0, 32
+ or D0, D0, D1
+ std D0, (\offset), (\oci_base)
+
+ .endm
+/// \endcond
+
+
+
+
+/// \fn gpe_get_core_data(GpeGetCoreDataParms *parms);
+/// \brief Get core chiplet raw data on performance/thermal timescale
+///
+/// This routine uses get_per_core_raw_data() to collect raw data for one or
+/// more cores. The \a data field of the GpeGetCoreDataParms parameter
+/// contains a pointer to an array of CoreData* pointers. Data for every core
+/// configured in the configuration mask is collected - it is assumed that the
+/// data area for the data exists.
+///
+/// This entry point is used by the lab thread 'coreData'.
+#ifdef DOXYGEN_ONLY
+void gpe_get_core_data(GpeGetCoreDataParms *parms);
+#endif
+/// \cond
+
+ // Register usage:
+ //
+ // ETR : At entry, holds the parameter pointer.
+ // A1 : Holds the pointer to the paramaters
+ // A0 : Holds the (varying) pointer to the data area for the
+ // current core.
+ // P1 : Holds the (constant) chiplet id of the TOD
+ // P0 : Holds the (varying) chiplet id of the current core
+ // SPRG0 : Temporary storage of the chiplet mask as it rotates.
+ // CTR : Loops through core chiplets indices
+ // D1 : Scratch
+ // D0 : Scratch
+
+ .global gpe_get_core_data
+
+gpe_get_core_data:
+
+ // Set up registers. The chiplet part of the ChipConfig is left
+ // justified then stored in SPRG0, where it will be maintained as we
+ // rotate through it. Note that SPRG0 is 32 bits, so it needs to be
+ // manipulated from the low-order portion of a data register.
+
+ mr D0, ETR
+ la A1, core_data_parms
+ std D0, 0, A1
+ mr A1, D0
+
+ ld D0, GPEGETCOREDATAPARMS_DATA, A1
+ mr A0, D0
+
+ ld D0, GPEGETCOREDATAPARMS_CONFIG, A1
+ left_justify_core_config D0
+ rotldi D0, D0, 32
+ mr SPRG0, D0
+
+ lpcs P1, TOD_VALUE_REG
+ ls P0, 0x10
+ ls CTR, (PGP_NCORES - 1) # PORE does test, then decr. and branch
+
+core_data_loop:
+
+ // Load/test the chiplet mask, and store the rotated mask back to
+ // SPRG0. If the chiplet is not configured, simply continue.
+
+ mr D0, SPRG0
+ andi D1, D0, 0x80000000
+ rotldi D0, D0, 1
+ mr SPRG0, D0
+ braz D1, core_data_continue
+
+ // Collect Raw Data for Core specified by P0, stored at A0
+
+ bsr get_per_core_raw_data
+
+core_data_continue:
+
+ // Increment the core chiplet index and data pointer, then loop or
+ // halt.
+
+ adds P0, P0, 1
+ adds A0, A0, CORE_DATA_SIZE
+ loop core_data_loop
+
+ halt
+
+ .epilogue gpe_get_core_data
+
+/// \endcond
+
+
+
+/// \fn gpe_get_per_core_data(GpeGetCoreDataParms *parms);
+/// \brief Get core chiplet raw data for a single core
+///
+/// This routine uses get_per_core_raw_data() to collect raw data for a single
+/// core. Regardless of the configuration mask setting, this routine exits
+/// after collecting data for a single core. The \a data field of the
+/// GpeGetCoreDataParms contains a pointer to a single CoreData object.
+///
+/// This entry point is used by OCC product firmware.
+#ifdef DOXYGEN_ONLY
+void gpe_get_per_core_data(GpeGetCoreDataParms *parms);
+#endif
+/// \cond
+
+ // Register usage:
+ //
+ // A1 : Holds the pointer to the paramaters
+ // A0 : Holds the (varying) pointer to the data area for the
+ // current core, as well as the data pointer-pointer while
+ // searching for a configured core.
+ // P1 : Holds the (constant) chiplet id of the TOD
+ // P0 : Holds the (varying) chiplet id of the current core
+ // SPRG0 : Temporary storage of the chiplet mask as it rotates.
+ // CTR : Loops through core chiplets indices
+ // D1 : Scratch
+ // D0 : Scratch
+
+ .global gpe_get_per_core_data
+
+gpe_get_per_core_data:
+
+ // Set up registers. A1 gets the parameters (which must also be
+ // stored in memory), the the ETR is replaced by the data
+ // pointer-pointer. The chiplet part of the ChipConfig is left
+ // justified then stored in SPRG0, where it will be maintained as we
+ // rotate through it. Note that SPRG0 is 32 bits, so it needs to be
+ // manipulated from the low-order portion of a data register.
+
+ mr D0, ETR
+ la A1, core_data_parms
+ std D0, 0, A1
+ mr A1, D0
+
+ ld D0, GPEGETCOREDATAPARMS_DATA, A1
+ mr A0, D0
+
+ ld D0, GPEGETCOREDATAPARMS_CONFIG, A1
+ left_justify_core_config D0
+ rotldi D0, D0, 32
+ mr SPRG0, D0
+
+ lpcs P1, TOD_VALUE_REG
+ ls P0, 0x10
+ ls CTR, (PGP_NCORES - 1) # PORE does test, then decr. and branch
+
+per_core_data_loop:
+
+ // Load/test the chiplet mask, and store the rotated mask back to
+ // SPRG0. If the chiplet is not configured, simply continue.
+
+ mr D0, SPRG0
+ andi D1, D0, 0x80000000
+ rotldi D0, D0, 1
+ mr SPRG0, D0
+ braz D1, per_core_data_continue
+
+ // Collect Raw Data for Core specified by P0, stored at A0
+
+ bsr get_per_core_raw_data
+
+ // Exit GPE after gathering data for one core
+ bra per_core_data_complete
+
+per_core_data_continue:
+
+ // Increment the core chiplet index and data pointer, then loop or
+ // halt.
+ adds P0, P0, 1
+ loop per_core_data_loop
+
+per_core_data_complete:
+ halt
+
+ .epilogue gpe_get_per_core_data
+
+/// \endcond
+
+/// \fn gpe_get_per_core_raw_data();
+/// \brief Get core chiplet raw data for one core
+///
+/// This routine collects raw data from the core designated by P0. Data is
+/// grouped into logical groups, and the collection of any group is enabled by
+/// a group select mask. All data and thread groups (except the PCB Slave
+/// group) are tagged with the TOD and raw cycle counts sampled immediately
+/// before the group data are sampled.
+///
+/// The final PCB Slave data group should always be selected (but \e is
+/// configurable) as it contains the PCB Slave Power Management history
+/// register. This register value is required to determine how to interpret
+/// the other data items.
+///
+/// The PC counters are collected using the SPRC/SPRD autoincrement
+/// mechanism. Be very cautious about changing this code or the data layout
+/// because the counter order is fixed by hardware and the data layout
+/// reflects the most natural way to collect the data based on the
+/// hardware. Note that SPRC/SPRD autoincrement IS NOT OPTIONAL for the OCC
+/// registers, regardless of how it may be documented in the PC workbook, or
+/// the fact that the procedure redundantly sets up auto-increment. That is,
+/// the hardware always does auto-increment for these SPRC/SPRD reads.
+///
+/// The data structure includes a TOD/Raw cycles word for each set of counters
+/// for each thread. Due to the amount of time it may take to collect
+/// per-thread data for 8 threads, errors of 1% or more could accrue at thread
+/// 7 if each thread group were not individually tagged. To avoid having to
+/// SCOM the TOD plus a SCOMC/SCOMD pair to create each thread group header
+/// however, we instead tag thread0 with actual data, then tag the remaining
+/// thread groups with interpolated TOD/Raw cycle values computed by obtaining
+/// a tag at the end of all threads. This takes only a little more time than
+/// the simpler expedient of copying the Tod/Raw Cycles count from thread0 to
+/// threads 1-7.
+///
+/// At the entry point of the routine, the code must go through the PC-ONLY
+/// special wakeup procedure to ensure that we can SCOM a napping core. This
+/// has to be done carefully as it's possible that SCOM access to the OHA will
+/// result in a 0x1 PIB response if the core is coming out of deep
+/// sleep/winkle. This PIB response would discombobulate the PORE engine so we
+/// have to run these SCOMs with error handling done manually. If a core is
+/// inaccessible due to an idle state we clear all of the configured EMPATH
+/// counts, per-thread counts and DTS and CPM for the core. If the core is
+/// only asleep (not winkled) then we attempt to read the DTS and CPM for the
+/// L3. Note that TOD timestamps are always collected, even if the data is
+/// simply zeroed.
+///
+/// A modified copy of the OHA_RO_STATUS_REG read during the PC-only SPWU
+/// protocol is stored with the data. Several low-order reserved bits of the
+/// register image are programmed with the following masks. See the
+/// documentation for these bits for full details.
+///
+/// - CORE_DATA_CPM_HIST_RESET_ACCESS_FAILED
+/// - CORE_DATA_OHA_RO_STATUS_ACCESS_FAILED
+/// - CORE_DATA_EMPATH_COLLECTED
+/// - CORE_DATA_CORE_SENSORS_COLLECTED
+/// - CORE_DATA_L3_SENSORS_COLLECTED
+/// - CORE_DATA_EXPECTED_EMPATH_ERROR
+/// - CORE_DATA_UNEXPECTED_EMPATH_ERROR
+///
+/// In the event of expected or unexpected errors during EMPATH data
+/// collection the 3-bit PCB error code will also be stored at bit
+/// CORE_DATA_EMPATH_ERROR_LOCATION.
+///
+/// This is the PC-ONLY Special Wakeup + processing Sequence
+///
+/// 1. Switch to manual error handling mode and disable PIB errors.
+///
+/// 2. Write OHA_CPM_HIST_RESET_REG.pconly_special_wakeup = 1. If the write
+/// fails, note the failure and go to the bypass routine.
+///
+/// 3. Read OHA_RO_STATUS_REG. If the SCOM fails, access is impossible and
+/// noted. If the special wakeup complete is not immediately set that error is
+/// also noted. If either test fails then go to the bypass routine. Otherwise
+/// note success and continue.
+///
+/// 4. Attempt to collect sensor (DTS/CPM) data for the core and L3. This must
+/// be done with manual error handling as these SCOMs are not protected by
+/// PC-only SPWU.
+///
+/// 5. Switch to a private error handling table setup that allows the
+/// procedure to catch PCB data errors during EMPATH processing. This is
+/// required as a workaround for HW280375.
+///
+/// 6. Collect EMPATH data.
+///
+/// 7. Restore error handling; Clear the PC-only SPWU bit.
+///
+/// 8. Collect PCB Slave data.
+///
+/// When the core is inaccessible a similar "bypass" sequence to the data
+/// collection sequence is run, however all data other than timestamps and the
+/// PCB Slave data are stored as 0, and the PC-Only SPWU bit is cleared before
+/// error handling is re-enabled. The bypass routine will also take care of
+/// attempting to collect L3 DTS/CPM data for sleeping cores.
+///
+/// Note that the PCB slave data must be collected after the removal of
+/// PC-only special wakeup, otherwise a napping core will always appear to be
+/// in the run state.
+///
+/// Several global variables are required. Thus this procedure and its callers
+/// are not reentrant.
+#ifdef DOXYGEN_ONLY
+ void get_per_core_raw_data();
+#endif
+/// \cond
+
+get_per_core_raw_data:
+
+ // At entry:
+ //
+ // P0 : The chiplet to access (invariant)
+ // A0 : Pointer to the data area for the core (invariant)
+ // SPRG0 : Reserved to the caller (invariant)
+ // CTR : Reserved to the caller (invariant)
+ //
+ // core_data_parms: Holds the pointer to the parameters
+ //
+ // At exit:
+ //
+ // All other registers are scratched by this routine
+
+ // (1) Switch to manual error handling mode and disable PIB errors.
+
+ mr D0, EMR
+ la A1, saved_emr
+ std D0, 0, A1
+
+ andi D0, D0, ~(PORE_ERROR_MASK_ENABLE_ERR_HANDLER0 | \
+ PORE_ERROR_MASK_ENABLE_ERR_OUTPUT0 | \
+ PORE_ERROR_MASK_ENABLE_FATAL_ERR_OUTPUT0 | \
+ PORE_ERROR_MASK_STOP_EXE_ON_ERROR0)
+ mr EMR, D0
+ la A1, manual_emr
+ std D0, 0, A1
+
+
+ // (2) Write OHA_CPM_HIST_RESET_REG.pconly_special_wakeup = 1. If the
+ // write fails, note the failure and go to the bypass routine.
+
+ sti OHA_CPM_HIST_RESET_REG, P0, \
+ OHA_CPM_HIST_RESET_REG_PCONLY_SPECIAL_WAKEUP
+ tprcbz D0, 3f
+
+ sti CORE_DATA_OHA_RO_STATUS_REG, A0, \
+ CORE_DATA_CPM_HIST_RESET_ACCESS_FAILED
+ bra bypass_core_data
+
+
+ // 3. Read OHA_RO_STATUS_REG. If the SCOM fails, access is impossible
+ // and noted. If the special wakeup complete is not immediately set
+ // that error is also noted. If either test fails then go to the
+ // bypass routine. Otherwise note success and continue.
+
+3:
+ ld D0, OHA_RO_STATUS_REG, P0
+ tprcbz D1, 31f
+
+ sti CORE_DATA_OHA_RO_STATUS_REG, A0, \
+ CORE_DATA_OHA_RO_STATUS_ACCESS_FAILED
+ bra bypass_core_data
+
+31:
+ std D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+
+ // If either access is impossible we go to bypass. The bypass code
+ // will read the L3 DTS/CPM data if it is possible.
+
+ andi D1, D0, (OHA_RO_STATUS_REG_CORE_ACCESS_IMPOSSIBLE | \
+ OHA_RO_STATUS_REG_ECO_ACCESS_IMPOSSIBLE)
+ branz D1, bypass_core_data
+
+ andi D1, D0, OHA_RO_STATUS_REG_SPECIAL_WAKEUP_COMPLETED
+ braz D1, bypass_core_data
+
+
+ // 4. Attempt to collect sensor (DTS/CPM) data. This must be done with
+ // manual error handling (in effect here) as these SCOMs are not
+ // protected by a PC-only SPWU.
+
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+ bsr getSensors
+
+
+ // 5. Switch to a private error handling table setup that allows the
+ // procedure to catch PCB errors during EMPATH processing.
+
+ // NB: We know that this is being run as a PoreFlex job from OCC FW on
+ // either GPE0 or GPE1. We also know that the default error mask does
+ // not handle any errors with a table.
+
+ tebngpe0 D0, 1f
+ la A1, PORE_GPE0_TABLE_BASE_ADDR
+ bra 2f
+1:
+ la A1, PORE_GPE1_TABLE_BASE_ADDR
+2:
+ la D0, empathErrorHandlers
+ std D0, 0, A1
+
+ la A1, saved_emr
+ ld D0, 0, A1
+ ori D0, D0, PORE_ERROR_MASK_ENABLE_ERR_HANDLER0
+ andi D0, D0, ~(PORE_ERROR_MASK_ENABLE_ERR_OUTPUT0 | \
+ PORE_ERROR_MASK_ENABLE_FATAL_ERR_OUTPUT0 | \
+ PORE_ERROR_MASK_STOP_EXE_ON_ERROR0)
+ mr EMR, D0
+
+#if INJECT_HW280375_ERRORS
+
+ // This code is used to test the workaround for HW280375. The
+ // undiagnosed hardware bug causes PCB error 4 to occur intermittantly
+ // when accessing EMPATH registers. The appearance of the defect is
+ // actually quite rare in practice, therefore this code remains in
+ // case future development and testing of this procedure is necessary.
+
+ // The test generates PCB error 4 by reading a non-existant OHA
+ // register of the current core, once every 1024 samples on
+ // average. The LFSR modifies A0 so we need to shuffle A0 <->
+ // A1. (Note the LFSR code is not delivered to OCC FW).
+
+ mr A1, A0
+
+ la A0, testHw280375Lfsr
+ ld D0, 0, A0
+ bsr pore_rand64
+ la A0, testHw280375Lfsr
+ std D0, 0, A0
+
+ mr A0, A1
+
+ andi D0, D0, 0x3ff
+ branz D0, 1f
+ ld D0, 0x200ff, P0 # Force PCB error 4
+1:
+
+#endif
+
+ // 6. Collect EMPATH data
+
+ // Test/collect each data group in order. First reload the parameter
+ // pointer into A1.
+
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+ // EMPATH
+empath:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_EMPATH
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_EMPATH_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ sti PC_OCC_SPRC, P0, \
+ (SPRN_CORE_INSTRUCTION_DISPATCH | SPRN_PC_AUTOINCREMENT)
+
+ get_pc_pair (_BASE + 0x08), A0, P0
+ get_pc_pair (_BASE + 0x10), A0, P0
+ get_pc_pair (_BASE + 0x18), A0, P0
+ get_pc_pair (_BASE + 0x20), A0, P0
+
+ // Per-Core (partition) Memory Counters
+per_core_memory:
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_MEMORY
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_MEMORY_BASE
+ tag_core_data_group _BASE, A0, P0, P1
+
+ sti PC_OCC_SPRC, P0, \
+ (SPRN_CORE_MEM_C_LPAR(0) | SPRN_PC_AUTOINCREMENT)
+
+ get_pc_pair (_BASE + 0x08), A0, P0
+ get_pc_pair (_BASE + 0x10), A0, P0
+
+ // Throttling Counters
+throttling:
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_THROTTLE
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_THROTTLE_BASE
+ tag_core_data_group _BASE, A0, P0, P1
+
+ sti PC_OCC_SPRC, P0, \
+ (SPRN_IFU_THROTTLE_COUNTER | SPRN_PC_AUTOINCREMENT)
+
+ get_pc_pair (_BASE + 0x08), A0, P0
+ get_pc_pair (_BASE + 0x10), A0, P0
+
+ // Per-Thread Counters
+per_thread:
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_THREAD
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_THREAD_BASE(0)
+ tag_core_data_group _BASE, A0, P0, P1
+
+ sti PC_OCC_SPRC, P0, \
+ (SPRN_THREAD_RUN_CYCLES(0) | SPRN_PC_AUTOINCREMENT)
+
+ get_pc_pair (_BASE + 0x08), A0, P0 # Run/Completion T0
+ get_pc_pair (_BASE + 0x10), A0, P0 # Mem A/B T0
+ // (_BASE + 0x18), A0, P0 # Tag T1
+ get_pc_pair (_BASE + 0x20), A0, P0 # Run/Completion T1
+ get_pc_pair (_BASE + 0x28), A0, P0 # Mem A/B T1
+ // (_BASE + 0x30), A0, P0 # Tag T2
+ get_pc_pair (_BASE + 0x38), A0, P0 # Run/Completion T2
+ get_pc_pair (_BASE + 0x40), A0, P0 # Mem A/B T2
+ // (_BASE + 0x48), A0, P0 # Tag T3
+ get_pc_pair (_BASE + 0x50), A0, P0 # Run/Completion T3
+ get_pc_pair (_BASE + 0x58), A0, P0 # Mem A/B T3
+ // (_BASE + 0x60), A0, P0 # Tag T4
+ get_pc_pair (_BASE + 0x68), A0, P0 # Run/Completion T4
+ get_pc_pair (_BASE + 0x70), A0, P0 # Mem A/B T4
+ // (_BASE + 0x78), A0, P0 # Tag T5
+ get_pc_pair (_BASE + 0x80), A0, P0 # Run/Completion T5
+ get_pc_pair (_BASE + 0x88), A0, P0 # Mem A/B T5
+ // (_BASE + 0x90), A0, P0 # Tag T6
+ get_pc_pair (_BASE + 0x98), A0, P0 # Run/Completion T6
+ get_pc_pair (_BASE + 0xa0), A0, P0 # Mem A/B T6
+ // (_BASE + 0xa8), A0, P0 # Tag T7
+ get_pc_pair (_BASE + 0xb0), A0, P0 # Run/Completion T7
+ get_pc_pair (_BASE + 0xb8), A0, P0 # Mem A/B T7
+
+
+ // Interpolation of TOD and Raw Cycles over 8 threads. First collect
+ // a new tag, then compute the difference with the thread0 tag. The
+ // differences are then divided by 8 to form the interpolation
+ // increment, and interpolation takes places in an unrolled loop.
+ //
+ // Note that we're doing parallel arithmetic here, and ignoring the
+ // fact that there may be a carry/borrow from the low-order TOD into
+ // the high-order cycle count. A single LSB is noise for the cycle
+ // count, but would be significant for the TOD, which is why the
+ // TOD is placed in the low-order part of the doubleword. Given that
+ // a single LSB is noise for the cycle count there is no reason to
+ // expend the time/code space to do the arithmetic 'correctly'.
+
+interpolate:
+ tag_core_data_group 0, 0, P0, P1, store=0 # D0 contains the _NOW_ tag
+
+ ld D1, CORE_DATA_THREAD_BASE(0), A0 # D1 will be used for interp.
+ sub D0, D0, D1
+ andi D0, D0, 0xfffffff8fffffff8 # Mask off bad bits and div. by 8.
+ rotrdi D0, D0, 3
+
+ .macro interpolate, thread
+ add D1, D0, D1
+ std D1, CORE_DATA_THREAD_BASE(\thread), A0
+ .endm
+
+ interpolate 1
+ interpolate 2
+ interpolate 3
+ interpolate 4
+ interpolate 5
+ interpolate 6
+ interpolate 7
+
+
+ // If we made it here there were no errors - Yippee! If we were asked
+ // to collect any EMPATH data then acknowledge that we did.
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, \
+ (GPE_GET_CORE_DATA_EMPATH | \
+ GPE_GET_CORE_DATA_MEMORY | \
+ GPE_GET_CORE_DATA_THROTTLE | \
+ GPE_GET_CORE_DATA_THREAD)
+ braz D0, 1f
+
+ ld D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+ ori D0, D0, CORE_DATA_EMPATH_COLLECTED
+ std D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+
+
+ // 7. Restore error handling; Clear the PC-Only SPWU bit
+1:
+ la A1, saved_emr
+ ld D0, 0, A1
+ mr EMR, D0
+
+ sti OHA_CPM_HIST_RESET_REG, P0, 0
+
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+
+ // 8. Collect PCB-Slave data
+pcb_slave:
+
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_PCB_SLAVE
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_PCB_SLAVE_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ get_scom D0, PCBS_POWER_MANAGEMENT_CONTROL_REG, P0, CORE_DATA_PMCR, A0
+ get_scom D0, PCBS_POWER_MANAGEMENT_STATUS_REG, P0, CORE_DATA_PMSR, A0
+ get_scom D0, PCBS_PMSTATEHISTOCC_REG, P0, CORE_DATA_PM_HISTORY, A0
+
+1:
+ ret
+
+
+ //////////////////////////////////////////////////////////////////////
+ // getSensors
+ //////////////////////////////////////////////////////////////////////
+ //
+ // Try to get core and L3 sensor (DTS/CPM) data
+ //
+ // At Entry:
+ //
+ // We are in manual PIB error handling mode
+ // A0 : Base address of core data area
+ // A1 : Address of the parameter block
+ // P0 : Chiplet
+ //
+ // At exit:
+ //
+ // A0, P0 unchanged
+ // D0, D1 scratched
+ //
+ // Note that due to HW279433, we can not read the CPM sensors without
+ // the possiblity of a FIR bit being set due to a PCB timeout. Since
+ // the CPMs are currently not in plan for P8, these fields of the data
+ // structure are simply zeroed.
+
+getSensors:
+
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_DTS_CPM
+ braz D0, getSensorsDone
+
+ // HW279433, see above
+ ls D0, 0
+ std D0, CORE_DATA_SENSOR_V8, A0
+ std D0, CORE_DATA_SENSOR_V9, A0
+
+ .set _BASE, CORE_DATA_DTS_CPM_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ // First try the core
+
+ ld D0, SENSORS_CORE_V0, P0
+ tprcbnz D1, coreSensorsFailed
+ std D0, CORE_DATA_SENSOR_V0, A0
+
+ ld D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+ ori D0, D0, CORE_DATA_CORE_SENSORS_COLLECTED
+ std D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+
+ bra tryL3
+
+coreSensorsFailed:
+
+ la A1, G_ggcd_coreSensorFail
+ std D1, 0, A1
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+ ls D0, 0
+ std D0, CORE_DATA_SENSOR_V0, A0
+
+ // Now try the L3
+tryL3:
+ ld D0, SENSORS_CORE_V1, P0
+ tprcbnz D1, l3SensorsFailed
+ std D0, CORE_DATA_SENSOR_V1, A0
+
+ ld D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+ ori D0, D0, CORE_DATA_L3_SENSORS_COLLECTED
+ std D0, CORE_DATA_OHA_RO_STATUS_REG, A0
+
+ bra getSensorsDone
+
+l3SensorsFailed:
+
+ la A1, G_ggcd_l3SensorFail
+ std D1, 0, A1
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+ ls D0, 0
+ std D0, CORE_DATA_SENSOR_V1, A0
+
+getSensorsDone:
+ ret
+
+
+ //////////////////////////////////////////////////////////////////////
+ // gpcrdError0
+ //
+ // Trap error 0 during EMPATH processing, and set a bit indicating if
+ // this is an "expected" or "unexpected" error. The only expected
+ // error is a PCB error #4 due to HW280375.
+ //
+ // Note that PORE treats error branches as subroutine calls. We need
+ // to pop the HW stack before continuing. We assume we are running on
+ // either GPE0 or GPE1.
+ ////////////////////////////////////////////////////////////////////////////
+
+ .global empathErrorHandlers
+empathErrorHandlers:
+ bra gpcrdError0
+
+gpcrdError0:
+
+ // Set A1 for current engine
+
+ tebngpe0 D0, 1f
+ la A1, PORE_GPE0_OCI_BASE
+ bra 2f
+1:
+ la A1, PORE_GPE1_OCI_BASE
+2:
+
+ // Extract PCB parity error + 3-bit code and compare. Apparently the
+ // PCB error code is not set in the IFR when we take the error branch,
+ // so we have to get it from the debug register. The error code is
+ // used to decide if the error is "expected" or "unexpected".
+
+ ld D0, PORE_DBG0_OFFSET, A1
+ extrdi D0, D0, 4, 32
+
+ ld D1, CORE_DATA_OHA_RO_STATUS_REG, A0
+ cmpibraeq D0, 1f, 4
+
+ // This error is "unexpected"
+
+ ori D1, D1, CORE_DATA_UNEXPECTED_EMPATH_ERROR
+ bra 2f
+
+ // This error (#4) is "expected"
+1:
+ ori D1, D1, CORE_DATA_EXPECTED_EMPATH_ERROR
+
+ // Insert the error code into the OHA_RO_STATUS image
+2:
+ insrdi D1, D0, \
+ CORE_DATA_EMPATH_ERROR_BITS, CORE_DATA_EMPATH_ERROR_LOCATION
+ std D1, CORE_DATA_OHA_RO_STATUS_REG, A0
+
+
+ // Pop the hardware stack. The easiest way to do this is to modify the
+ // current stack pointer and "return" to a local label.
+
+ la D0, 1f
+ sldi D0, D0, 16
+ std D0, PORE_PC_STACK0_OFFSET, A1
+ ret
+1:
+
+ // Clear the debug registers.
+
+ ls D0, 0
+ std D0, PORE_DBG0_OFFSET, A1
+ std D0, PORE_DBG1_OFFSET, A1
+
+ // Bypass EMPATH data (that routine will restore the default error
+ // handling and re-establish A1)
+
+ bra bypass_core_data
+
+
+ //////////////////////////////////////////////////////////////////////
+ // bypass_core_data
+ //////////////////////////////////////////////////////////////////////
+ //
+ // This entry point is used when the core is inaccessible due to idle
+ // modes or other conditions. At entry we are in manual SCOM error
+ // handling mode. The routine will first attempt to collect the
+ // core and L3 DTS/CPM for Sleeping cores, then restore error
+ // handling and zero out the EMPATH data before collecting PCBS data.
+
+ // HW243646: We never read EMPATH counters here. The
+ // counters are all zeroed and all calls of tag_core_data_group
+ // specify raw=0.
+
+bypass_core_data:
+
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+ bsr getSensors
+
+ // Clear the PC-Only SPWU bit and restore SCOM error handling. Then
+ // reload the parameter pointer into A1.
+
+ sti OHA_CPM_HIST_RESET_REG, P0, 0
+
+ la A1, saved_emr
+ ld D0, 0, A1
+ mr EMR, D0
+
+ la A1, core_data_parms
+ ld D0, 0, A1
+ mr A1, D0
+
+ // Bypass core data
+
+ // EMPATH
+
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_EMPATH
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_EMPATH_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ ls D0, 0
+ std D0, (_BASE + 0x08), A0
+ std D0, (_BASE + 0x10), A0
+ std D0, (_BASE + 0x18), A0
+ std D0, (_BASE + 0x20), A0
+
+
+ // Per-Core Memory Counters
+
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_MEMORY
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_MEMORY_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ ls D0, 0
+ std D0, (_BASE + 0x08), A0
+ std D0, (_BASE + 0x10), A0
+
+
+ // Throttling Counters
+
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_THROTTLE
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_THROTTLE_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ ls D0, 0
+ std D0, (_BASE + 0x08), A0
+ std D0, (_BASE + 0x10), A0
+
+
+ // Per-Thread Counters
+
+1:
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_THREAD
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_THREAD_BASE(0)
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ ls D0, 0
+ std D0, (_BASE + 0x08), A0 # Run/Completion T0
+ std D0, (_BASE + 0x10), A0 # Mem A/B T0
+ // (_BASE + 0x18), A0 # Tag T1
+ std D0, (_BASE + 0x20), A0 # Run/Completion T1
+ std D0, (_BASE + 0x28), A0 # Mem A/B T1
+ // (_BASE + 0x30), A0 # Tag T2
+ std D0, (_BASE + 0x38), A0 # Run/Completion T2
+ std D0, (_BASE + 0x40), A0 # Mem A/B T2
+ // (_BASE + 0x48), A0 # Tag T3
+ std D0, (_BASE + 0x50), A0 # Run/Completion T3
+ std D0, (_BASE + 0x58), A0 # Mem A/B T3
+ // (_BASE + 0x60), A0 # Tag T4
+ std D0, (_BASE + 0x68), A0 # Run/Completion T4
+ std D0, (_BASE + 0x70), A0 # Mem A/B T4
+ // (_BASE + 0x78), A0 # Tag T5
+ std D0, (_BASE + 0x80), A0 # Run/Completion T5
+ std D0, (_BASE + 0x88), A0 # Mem A/B T5
+ // (_BASE + 0x90), A0 # Tag T6
+ std D0, (_BASE + 0x98), A0 # Run/Completion T6
+ std D0, (_BASE + 0xa0), A0 # Mem A/B T6
+ // (_BASE + 0xa8), A0 # Tag T7
+ std D0, (_BASE + 0xb0), A0 # Run/Completion T7
+ std D0, (_BASE + 0xb8), A0 # Mem A/B T7
+
+
+ // Interpolation of TOD and Raw Cycles over 8 threads. First collect
+ // a new tag, then compute the difference with the thread0 tag. The
+ // differences are then divided by 8 to form the interpolation
+ // increment, and interpolation takes places in an unrolled loop.
+ //
+ // Note that we're doing parallel arithmetic here, and ignoring the
+ // fact that there may be a carry/borrow from the low-order TOD into
+ // the high-order cycle count. A single LSB is noise for the cycle
+ // count, but would be significant for the TOD, which is why the
+ // TOD is placed in the low-order part of the doubleword. Given that
+ // a single LSB is noise for the cycle count there is no reason to
+ // expend the time/code space to do the arithmetic 'correctly'.
+
+ tag_core_data_group 0, 0, P0, P1, raw=0, store=0 # D0 contains _NOW_ tag
+
+ ld D1, CORE_DATA_THREAD_BASE(0), A0 # D1 will be used for interp.
+ sub D0, D0, D1
+ andi D0, D0, 0xfffffff8fffffff8 # Mask off bad bits and div. by 8.
+ rotrdi D0, D0, 3
+
+ interpolate 1
+ interpolate 2
+ interpolate 3
+ interpolate 4
+ interpolate 5
+ interpolate 6
+ interpolate 7
+
+
+ // Per-Core PCB Slave Registers
+get_pcbs_data:
+
+ ldandi D0, GPEGETCOREDATAPARMS_SELECT, A1, GPE_GET_CORE_DATA_PCB_SLAVE
+ braz D0, 1f
+
+ .set _BASE, CORE_DATA_PCB_SLAVE_BASE
+ tag_core_data_group _BASE, A0, P0, P1, raw=0
+
+ get_scom D0, PCBS_POWER_MANAGEMENT_CONTROL_REG, P0, CORE_DATA_PMCR, A0
+ get_scom D0, PCBS_POWER_MANAGEMENT_STATUS_REG, P0, CORE_DATA_PMSR, A0
+ get_scom D0, PCBS_PMSTATEHISTOCC_REG, P0, CORE_DATA_PM_HISTORY, A0
+
+1:
+ ret
+
+/// \endcond
+
+
+////////////////////////////////////////////////////////////////////////////
+// gpe_get_core_data_fast()
+////////////////////////////////////////////////////////////////////////////
+
+/// \fn gpe_get_core_data_fast(GpeGetChipDataFastParms *parms);
+/// \brief Get chip raw data on fastest possible timescale
+///
+/// This routine collects raw data for the entire chip on the fastest possible
+/// timescale. Where chiplet data is collected, the configured chiplets are
+/// specified in the configuration mask parameter. Data is grouped
+/// into logical groups, and the collection of any group is enabled by a group
+/// select mask. All data groups are tagged with the TOD.
+#ifdef DOXYGEN_ONLY
+void gpe_get_core_data_fast(GpeGetChipDataFastParms *parms);
+#endif
+/// \cond
+
+ // Register usage:
+ //
+ // A1 : Holds the (constant) pointer to the paramaters
+ // A0 : Holds the (varying) pointer to the data area for the current
+ // data group or datum.
+ // P1 : Holds the (constant) chiplet id of the TOD
+ // P0 : Holds the (varying) chiplet id of interest
+ // CTR : Loops through chiplet indices
+ // D1 : Holds/rotates configuration mask
+ // D0 : Scratch
+
+ .global gpe_get_core_data_fast
+
+gpe_get_core_data_fast:
+
+ // Set up registers. A0 must follow the target OCI address as each core
+ // chiplet is considered. Since we're only doing a single
+ // getscom/putOCI, we can keep the chiplet mask in D1. The data group
+ // is tagged with the TOD.
+
+ mr A1, ETR
+ ld D0, GPEGETCOREDATAFASTPARMS_CONFIG, A1
+ left_justify_core_config D0
+ mr D1, D0
+ lpcs P1, TOD_VALUE_REG
+ ld D0, GPEGETCOREDATAFASTPARMS_DATA, A1
+ mr A0, D0
+
+ tag_data_group CORE_DATA_FAST_FREQ_TARGET_BASE, D0, A0, P1
+ adds A0, A0, 8
+
+ ls P0, 0x10
+ ls CTR, (PGP_NCORES - 1) # PORE does test, then decr. and branch
+
+freq_target_loop:
+
+ // Test the chiplet mask. If the chiplet is not configured, simply
+ // continue.
+
+ andi D0, D1, 0x8000000000000000
+ rotldi D1, D1, 1
+ braz D0, freq_target_continue
+
+ get_scom D0, PCBS_LOCAL_PSTATE_FREQUENCY_TARGET_STATUS_REG, P0, \
+ 0x00, A0
+
+freq_target_continue:
+
+ // Increment the core chiplet index and data pointer, then loop or
+ // carry on.
+
+ adds P0, P0, 1
+ adds A0, A0, 8
+ loop freq_target_loop
+
+1:
+ halt
+
+/// \endcond
+
+
+////////////////////////////////////////////////////////////////////////////
+// gpe_get_chip_data()
+////////////////////////////////////////////////////////////////////////////
+
+/// \fn gpe_get_chip_data(GpeGetChipDataParms *parms);
+/// \brief Get chip-level raw data
+///
+/// This routine collects chip-level raw data. Data is grouped into logical
+/// groups, and the collection of any group is enabled by a group select
+/// mask. All data groups are tagged with the TOD.
+#ifdef DOXYGEN_ONLY
+void gpe_get_chip_data(GpeGetChipDataParms *parms);
+#endif
+/// \cond
+
+ // Register usage:
+ //
+ // A0 : Holds the (varying) pointer to the data area for the current
+ // data group or datum.
+ // P1 : Holds the (constant) chiplet id of the TOD
+ // D1 : Holds the (constant) select mask
+
+ .global gpe_get_chip_data
+
+gpe_get_chip_data:
+
+ // Set up registers.
+
+ mr A1, ETR
+ ld D0, GPEGETCHIPDATAPARMS_SELECT, A1
+ mr D1, D0
+ lpcs P1, TOD_VALUE_REG
+ ld D0, GPEGETCHIPDATAPARMS_DATA, A1
+ mr A0, D0
+
+ // Overcommit data.
+
+ andi D0, D1, GPE_GET_CHIP_DATA_OVERCOMMIT
+ braz D0, 1f
+ tag_data_group CHIP_DATA_OVERCOMMIT_BASE, D0, A0, P1
+
+ // Overcommit data consists of PBA_PBOCR(0)...PBA_PBOCR(5), all stored
+ // at 8-byte offsets
+
+ la A1, PBA_PBOCRN(0)
+ ocicopy D0, 0x00, A1, 0x08, A0
+ ocicopy D0, 0x08, A1, 0x10, A0
+ ocicopy D0, 0x10, A1, 0x18, A0
+ ocicopy D0, 0x18, A1, 0x20, A0
+ ocicopy D0, 0x20, A1, 0x28, A0
+ ocicopy D0, 0x28, A1, 0x30, A0
+
+1:
+ halt
+
+ .epilogue gpe_get_chip_data
+
+/// \endcond
+
+
+////////////////////////////////////////////////////////////////////////////
+// gpe_get_mem_data()
+////////////////////////////////////////////////////////////////////////////
+
+/// \fn gpe_get_mem_data(GpeGetMemDataParms *parms);
+/// \brief Get memory (MCS/Centaur) data for a particular MCS/Centaur
+///
+/// This routine collects data for the MCS/Centaur named (by instance ID,
+/// (0...PGP_NCENTAUR -1)) in the \a collect field of the \a parms parameter,
+/// unless \a collect is -1 in which case the data collection is bypassed.
+/// Once data has been collected, if the \a update field of the a \parms is
+/// not -1 then that numbered Centaur will be "poked" to start the sensor
+/// cache update. Once data collection (if any) and "poking" (if any) are
+/// finished the parameter block is timestamped with the TOD (at the standard
+/// 2MHz). This means that the TOD timestamp marks the "poke" time (when data
+/// collection starts), not the data collection time.
+///
+/// This procedure requires that the global G_centaurConfiguration structure
+/// must be present and have been properly initialized by
+/// centaur_configuration_create(). The procedure returns a return code -
+/// Either 0 for success, or a non zero value for failure. The failure codes
+/// are documented here: \ref gpe_get_mem_data_rc. Since the parameter block
+/// is read and written by GPE code it is strongly recommended to allocate
+/// instances of this structure in non-cacheable data sections, with the
+/// caveat that data structures assigned to non-default data sections must
+/// always be initialized. For example:
+///
+/// \code
+///
+/// static GpeGetMemDataParms S_parms SECTION_ATTRIBUTE(".noncacheable") = {0};
+///
+/// \endcode
+///
+/// NB: SW273814 documents a request to be able to differentiate which of the 2
+/// Centaurs is responsible for a hard failure. That's why we take pains to
+/// set up the RC prior to collection/poking to enable recovery code to make
+/// this determination.
+#ifdef DOXYGEN_ONLY
+void gpe_get_mem_data(GpeGetMemDataParms *parms);
+#endif
+/// \cond
+
+ .global gpe_get_mem_data
+gpe_get_mem_data:
+
+ // At entry:
+ //
+ // ETR : parms
+ //
+ // Invariants:
+ //
+ // ETR : parms
+ // A1 : parms (except when scratched by subroutines, always restored)
+
+ // Begin by marking the procedure as having died
+
+ mr A1, ETR
+ sti GPEGETMEMDATAPARMS_RC, A1, GPE_GET_MEM_DATA_DIED
+
+ // Next check to make sure the G_centaurConfiguration is properly
+ // initialized (.configRc == 0).
+ //
+ // A1 : parms
+
+ la A0, G_centaurConfiguration
+ ld D0, CENTAUR_CONFIGURATION_CONFIG_RC, A0
+ braz D0, 1f
+
+ ls D0, GPE_GET_MEM_DATA_NOT_CONFIGURED
+ bra ggmdExit
+
+1:
+ // Set up the PBA for Centaur sensor cache access
+ //
+ // A1 : parms
+ // A0 : &G_centaurConfiguration ==> &G_centaurConfiguration.dataParms;
+
+ adds A0, A0, CENTAUR_CONFIGURATION_DATA_PARMS
+ bsr gpe_pba_reset
+ bsr gpe_pba_setup
+ mr A1, ETR # Re-establish invariant
+
+
+ // See if we're collecting data this pass. If so validate that the
+ // MCS/Centaur index is valid according to G_centaurConfiguration.
+ //
+ // A1 : parms
+
+ ld D0, GPEGETMEMDATAPARMS_COLLECT, A1
+ cmpibraeq D0, ggmdUpdate, -1
+
+ bsr ggmdDataSetup
+ mr A1, ETR # Re-establish invariant
+ braz D0, 1f
+
+ ls D0, GPE_GET_MEM_DATA_COLLECT_INVALID
+ bra ggmdExit
+
+1:
+ // A0 has the base address of the sensor cache as a PowerBus
+ // mapping. Load A1 with the user data pointer and collect the data.
+ //
+ // A1 : parms ==> &MemData
+
+ sti GPEGETMEMDATAPARMS_RC, A1, GPE_GET_MEM_DATA_SENSOR_CACHE_FAILED
+
+ ld D0, GPEGETMEMDATAPARMS_DATA, A1
+ mr A1, D0
+
+ ocicopy D0, 0x00, A0, 0x00, A1
+ ocicopy D0, 0x08, A0, 0x08, A1
+ ocicopy D0, 0x10, A0, 0x10, A1
+ ocicopy D0, 0x18, A0, 0x18, A1
+ ocicopy D0, 0x20, A0, 0x20, A1
+ ocicopy D0, 0x28, A0, 0x28, A1
+ ocicopy D0, 0x30, A0, 0x30, A1
+ ocicopy D0, 0x38, A0, 0x38, A1
+ ocicopy D0, 0x40, A0, 0x40, A1
+ ocicopy D0, 0x48, A0, 0x48, A1
+ ocicopy D0, 0x50, A0, 0x50, A1
+ ocicopy D0, 0x58, A0, 0x58, A1
+ ocicopy D0, 0x60, A0, 0x60, A1
+ ocicopy D0, 0x68, A0, 0x68, A1
+ ocicopy D0, 0x70, A0, 0x70, A1
+ ocicopy D0, 0x78, A0, 0x78, A1
+
+ mr A1, ETR # Re-establish invariant
+
+ sti GPEGETMEMDATAPARMS_RC, A1, GPE_GET_MEM_DATA_DIED
+
+ // See if we're poking Centaur this pass. If so validate that the
+ // MCS/Centaur index is valid according to G_centaurConfiguration.
+ //
+ // A1 : parms
+ggmdUpdate:
+
+ ld D0, GPEGETMEMDATAPARMS_UPDATE, A1
+ cmpibraeq D0, ggmdTimestamp, -1
+
+ bsr ggmdDataSetup
+ mr A1, ETR # Re-establish invariant
+ braz D0, 1f
+
+ ls D0, GPE_GET_MEM_DATA_UPDATE_INVALID
+ bra ggmdExit
+
+1:
+ // Poke it
+
+ sti GPEGETMEMDATAPARMS_RC, A1, GPE_GET_MEM_DATA_UPDATE_FAILED
+
+ ls D0, 0
+ std D0, 0, A0
+
+ sti GPEGETMEMDATAPARMS_RC, A1, GPE_GET_MEM_DATA_DIED
+
+ // Collect the timestamp and reduce the 64-bit 512MHz timestamp to a
+ // 32-bit 2MHz timestamp. Then we're out...
+ //
+ // A1 : parms
+ggmdTimestamp:
+
+ lpcs P0, TOD_VALUE_REG
+ ld D0, TOD_VALUE_REG, P0
+ extrdi D0, D0, 32, 24
+ std D0, GPEGETMEMDATAPARMS_PAD_TOD, A1
+
+
+ ////////////////////////////////////////////////////////////////////
+ // Not so fast... If this is Centaur DD1 then we did not actually
+ // collect the Centaur internal temperatures due to HW256773. So we
+ // will go collect them now "manually" by calling _gpe_scom_centaur
+ // with a hard-coded setup to collect SCOM 0x02050000. We then
+ // splice this result into the accumulated cache-line data.
+ //
+ // A1 : Parms
+ ////////////////////////////////////////////////////////////////////
+
+ // Nothing to do if we're not collecting data. Otherwise pull out the
+ // CFAM ID and compare for Centaur DD1
+
+ ld D0, GPEGETMEMDATAPARMS_COLLECT, A1
+ cmpibraeq D0, ggmdCleanExit, -1
+
+ sldi D0, D0, 3 # Multiply by 8 for a byte offset
+
+ la D1, G_centaurConfiguration
+ adds D1, D1, CENTAUR_CONFIGURATION_DEVICE_ID
+ add D0, D0, D1
+ mr A0, D0
+ ld D0, 0, A0
+ extrdi D0, D0, 32, 0
+
+ cmpibrane D0, ggmdCleanExit, CFAM_CHIP_ID_CENTAUR_10
+
+ // This is DD1. Set up the parameters and call _gpe_scom_centaur.
+ // Since we can only do 8-byte stores we read-modify-write the first
+ // entry of the scomList_t. Then call for the SCOM. If it failed set
+ // the failure code. All registers must be restored after the
+ // subroutine call.
+
+ la A0, G_ggmdHw256773
+ ld D0, SCOM_LIST_COMMAND, A0
+ ld D1, GPEGETMEMDATAPARMS_COLLECT, A1
+ scom_list_set_instance_number D0, D1
+ std D0, SCOM_LIST_COMMAND, A0
+
+ la A0, G_hw256773
+ bsr _gpe_scom_centaur
+
+ la A0, G_hw256773
+ mr A1, ETR
+
+ ld D0, GPE_SCOM_PARMS_RC_ERROR_INDEX, A0
+ gpe_scom_parms_get_rc D0, D0
+ braz D0, 1f
+
+ ls D0, GPE_GET_MEM_DATA_HW256773_FAILED
+ bra ggmdExit
+
+1:
+ // The SCOM succeeded. The data needs to be moved from the
+ // gpe_scom_centaur data into the sensor-cache data area. Since there
+ // are only 32 bits we need to read-modify-write the SRAM. This is
+ // doubleword 12 of the sensor cache. The 32 bits of the SCOM we need
+ // are the high-order bits, copied into the low-order bits of the
+ // sensor-cache doubleword. Finally fall through to the clean exit.
+
+ la A0, G_ggmdHw256773
+ ld D0, SCOM_LIST_DATA, A0
+
+ ld D1, GPEGETMEMDATAPARMS_DATA, A1
+ mr A0, D1
+ ld D1, 0x60, A0
+ rldimi D1, D0, 32, 32, 63
+ std D1, 0x60, A0
+
+
+ggmdCleanExit:
+ ls D0, 0
+ggmdExit:
+ std D0, GPEGETMEMDATAPARMS_RC, A1
+ halt
+
+
+ //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ // ggmdDataSetup
+ //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ //
+ // At entry:
+ //
+ // D0 : The Centaur instance number to set up
+ //
+ // At exit:
+ //
+ // A0 : On success, the OCI base address to use to access the
+ // sensor cache.
+ // D0 : 0 = Success; 1 = Failure - the caller will supply the
+ // correct error code back to the user.
+ //
+ // This routine checks the Centaur instance number for validity. If
+ // the instance number is valid then the PBA is programmed to access
+ // the sensor cache address. This requires reprogramming the PBA
+ // because part of the data address, which varies by Centaur, must be
+ // stored as the extended address field of the PBA slave control
+ // register. It is not necessary to reset the PBA slave for each data
+ // operation.
+ggmdDataSetup:
+
+ // Check the Centaur instance number (D0) for validity.
+
+ ls D1, PGP_NCENTAUR
+ sub D1, D0, D1
+ tfbult D1, 1f
+
+ ls D0, 1
+ ret # Centaur instance too big
+
+1:
+ // Check to make sure the Centaur is configured by testing the base
+ // address for 0. The instance number is first multiplied by 8 to
+ // create an array offset.
+
+ sldi D0, D0, 3
+ la D1, G_centaurConfiguration
+ adds D1, D1, CENTAUR_CONFIGURATION_BASE_ADDRESS
+ add D0, D0, D1
+ mr A0, D0
+ ld D0, 0, A0
+ branz D0, 1f
+
+ ls D0, 1
+ ret # Base address is 0
+
+1:
+ // We have the Centaur base address in D0, and convert it to the full
+ // PowerBus address for the inband sensor cache access. Bit 27 is set
+ // to indicate OCC (vs. FSP) access. Bit 28 is set to indicate a
+ // sensor cache access.
+
+ ori D0, D0, 0x0000001800000000
+
+#if 1
+ la A0, G_ggmd_lastDataAddress # Debug
+ std D0, 0, A0
+#endif
+
+ // The OCI address is always 0, decorated with the PBA BAR number.
+
+ la A0, (PBA_BAR_CENTAUR << 28)
+
+ // Bits 23:36 of the address go into the extended address field (35:
+ // 48) of the PBA slave control register by a read-modify-write
+ // operation. Note: We're using rldimi explicitly here - not an
+ // extended mnemonic - to save having to justify the data.
+
+ la A1, G_centaurConfiguration
+ ld D1, \
+ (CENTAUR_CONFIGURATION_DATA_PARMS + \
+ GPEPBAPARMS_SLVCTL_ADDRESS), \
+ A1
+ mr A1, D1
+ ld D1, 0, A1
+ rldimi D1, D0, 64 - (35 - 23), 35, 48
+ std D1, 0, A1
+
+#if 1
+ la A1, G_ggmd_lastSlaveControl # Debug
+ std D1, 0, A1
+ mr D1, A0
+ la A1, G_ggmd_lastOciAddress
+ std D1, 0, A1
+#endif
+
+ // Clear D0 to signal success and we're out
+
+ ls D0, 0
+ ret
+ .epilogue gpe_get_mem_data
+
+/// \endcond
+
+
+////////////////////////////////////////////////////////////////////////////
+// Global Data
+////////////////////////////////////////////////////////////////////////////
+
+
+
+/// \cond
+
+ .data.pore
+
+ // Data storage for gpe_get_core_data()
+
+core_data_parms:
+ .quad 0
+saved_emr:
+ .quad 0
+manual_emr:
+ .quad 0
+hw243646:
+#if 0
+ .quad 0x3 # Determined + Required
+#else
+ .quad 0x2 # Determined + Not Required
+#endif
+
+ // Used to debug the workaround for HW280375
+
+testHw280375Lfsr:
+ .quad 0xdeadbeef # Initial state of LFSR
+
+ // Debug/Info: Failure codes when sensor reads fail
+
+ .global G_ggcd_coreSensorFail
+G_ggcd_coreSensorFail:
+ .quad 0
+
+ .global G_ggcd_l3SensorFail
+G_ggcd_l3SensorFail:
+ .quad 0
+
+
+ // Debug only, the last values computed by ggmdDataSetup.
+
+ .global G_ggmd_lastDataAddress
+G_ggmd_lastDataAddress:
+ .quad 0
+
+ .global G_ggmd_lastSlaveControl
+G_ggmd_lastSlaveControl:
+ .quad 0
+
+ .global G_ggmd_lastOciAddress
+G_ggmd_lastOciAddress:
+ .quad 0
+
+
+ // Required for Centaur DD1. This is an assembler layout of a
+ // GpeScomParms structure pointing to a scomList_t structure to read
+ // Centaur SCOM 0x02050000. See the code comments for more details.
+
+ .global G_ggmdHw25773
+G_ggmdHw256773:
+ .long 0x02050000 # SCOM
+ .byte 0 # Reserved
+ .byte 0 # Error flags (output)
+ .byte 0 # Instance Number (input)
+ .byte GPE_SCOM_READ # Command
+ .quad 0 # Mask (unused)
+ .quad 0 # Data (output)
+
+ .global G_hw256773
+G_hw256773:
+ .long 0 # (32-bit addresses)
+ .long G_ggmdHw256773 # scomList
+ .long 1 # Entries in the scomList
+ .long 0 # Options
+ .long 0 # rc (output)
+ .long 0 # errorIndex (output)
+
+/// \endcond
OpenPOWER on IntegriCloud