diff options
author | Doug Gilbert <dgilbert@us.ibm.com> | 2017-11-06 11:34:31 -0600 |
---|---|---|
committer | William A. Bryan <wilbryan@us.ibm.com> | 2017-11-10 14:14:08 -0500 |
commit | ccdfc6a407fd1face276a2b04e8fc3eed8868cbd (patch) | |
tree | ad625ca7b4865e24fe07c408bef97bc9d9833593 | |
parent | 0e91ced92e0f0bd2551d925903258b7d78118956 (diff) | |
download | talos-occ-ccdfc6a407fd1face276a2b04e8fc3eed8868cbd.tar.gz talos-occ-ccdfc6a407fd1face276a2b04e8fc3eed8868cbd.zip |
OCC work-around for HW426350
Change-Id: Ica7272dc0fef3721b415fd5f72b1abf83397d341
CQ: SW407201
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/49293
Reviewed-by: Martha Broyles <mbroyles@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: William A. Bryan <wilbryan@us.ibm.com>
-rw-r--r-- | src/include/core_data.h | 17 | ||||
-rwxr-xr-x | src/occ_405/proc/proc_data.c | 12 | ||||
-rw-r--r-- | src/occ_gpe0/core_data.c | 350 |
3 files changed, 251 insertions, 128 deletions
diff --git a/src/include/core_data.h b/src/include/core_data.h index 1ffe5fe..1149000 100644 --- a/src/include/core_data.h +++ b/src/include/core_data.h @@ -75,6 +75,18 @@ #define CORE2_VDM_SMALL_DROOP 0x0000000000002000ull #define CORE3_VDM_SMALL_DROOP 0x0000000000000200ull +// return codes: +#define SIBRC_RESOURCE_OCCUPIED (1) +#define SIBRC_CORE_FENCED (2) +#define SIBRC_PARTIAL_GOOD (3) +#define SIBRC_ADDRESS_ERROR (4) +#define SIBRC_CLOCK_ERROR (5) +#define SIBRC_PACKET_ERROR (6) +#define SIBRC_TIMEOUT (7) + +#define EMPATH_VALID (1) + +#define WORKAROUND_SCOM_ADDRESS 0x10800 typedef struct { @@ -119,7 +131,8 @@ typedef struct } DroopEvents; // -// The instance of this data object must be 8 byte aligned +// The instance of this data object must be 8 byte aligned and +// size must be muliple of 8 // typedef struct // 136 bytes { @@ -129,6 +142,8 @@ typedef struct // 136 bytes CoreDataDts dts; // 8 uint64_t stop_state_hist; // 8 DroopEvents droop; // 8 + uint32_t reserved; // 4 + uint32_t empathValid; // 4 } CoreData; #ifdef __cplusplus diff --git a/src/occ_405/proc/proc_data.c b/src/occ_405/proc/proc_data.c index e8e9eba..5cefc17 100755 --- a/src/occ_405/proc/proc_data.c +++ b/src/occ_405/proc/proc_data.c @@ -190,9 +190,17 @@ void task_core_data( task_t * i_task ) //responsible for clearing the bit later on. G_updated_core_mask |= (CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core)); - // Presumptively clear the empath error mask - G_empath_error_core_mask &= + // set or clear the empath error mask + if(l_temp->empathValid) + { + G_empath_error_core_mask &= ~(CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core)); + } + else + { + G_empath_error_core_mask |= + (CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core)); + } } // If the core is not present, then we need to point to the empty G_core_data diff --git a/src/occ_gpe0/core_data.c b/src/occ_gpe0/core_data.c index d4346f3..7d52a75 100644 --- a/src/occ_gpe0/core_data.c +++ b/src/occ_gpe0/core_data.c @@ -32,6 +32,7 @@ #include "ppe42_msr.h" #include "ppe42_scom.h" #include "cme_register_addresses.h" +#include "pk.h" #define CME_VDSR_BASE (CME_SCOM_VDSR & 0x00ffffff) @@ -70,166 +71,265 @@ uint32_t get_core_data(uint32_t i_core, // mask off SIB errors as machine checks, return rc instead mtmsr((mfmsr() & ~(MSR_SIBRC | MSR_SIBRCA)) | MSR_SEM); + // ============== // DTS - dts_sensor_result_reg_t dts_scom_data; - - PPE_LVD(quadSelect + THERM_DTS_RESULT, value64); - dts_scom_data.value = value64; - - // Store the quad DTS readings - o_data->dts.cache[0].result = dts_scom_data.half_words.reading[0]; - o_data->dts.cache[1].result = dts_scom_data.half_words.reading[1]; - - PPE_LVD(coreSelect + THERM_DTS_RESULT, value64); - dts_scom_data.value = value64; - - o_data->dts.core[0].result = dts_scom_data.half_words.reading[0]; - o_data->dts.core[1].result = dts_scom_data.half_words.reading[1]; - - // DROOP - // Read Droop events. Event bit == 0 indicates event occurred. - // Side effect of read: event bits are reset to 1 (no event) in hw - // Only quad large droop and core small drop events are of interest - PPE_LVD(quadSelect + CME_VDSR_BASE, value64); - - if((value64 & CACHE_VDM_LARGE_DROOP) == 0) + // ============== + do { - ++g_vdm_cache_large_droop_count[i_core / CORES_PER_QUAD]; - } - - idx = (i_core / CORES_PER_QUAD) * CORES_PER_QUAD; + dts_sensor_result_reg_t dts_scom_data; - if((value64 & CORE0_VDM_SMALL_DROOP) == 0) - { - ++g_vdm_core_small_droop_count[idx]; - } + rc = getscom(quadSelect,THERM_DTS_RESULT, &(dts_scom_data.value)); + if (rc) + { + break; + } - if((value64 & CORE1_VDM_SMALL_DROOP) == 0) - { - ++g_vdm_core_small_droop_count[idx+1]; - } + // Store the quad DTS readings + o_data->dts.cache[0].result = dts_scom_data.half_words.reading[0]; + o_data->dts.cache[1].result = dts_scom_data.half_words.reading[1]; - if((value64 & CORE2_VDM_SMALL_DROOP) == 0) - { - ++g_vdm_core_small_droop_count[idx+2]; - } + rc = getscom(coreSelect, THERM_DTS_RESULT, &(dts_scom_data.value)); + if (rc) + { + break; + } - if((value64 & CORE3_VDM_SMALL_DROOP) == 0) - { - ++g_vdm_core_small_droop_count[idx+3]; - } + o_data->dts.core[0].result = dts_scom_data.half_words.reading[0]; + o_data->dts.core[1].result = dts_scom_data.half_words.reading[1]; + + // ============= + // DROOP + // ============= + // Read Droop events. Event bit == 0 indicates event occurred. + // Side effect of read: event bits are reset to 1 (no event) in hw + // Only quad large droop and core small drop events are of interest + rc = getscom(quadSelect, CME_VDSR_BASE, &value64); + if (rc) + { + // DROOP events are not critial. Leave event counts as zero + // and continue. + PK_TRACE("Could not read droop events! rc = %d",rc); + rc = 0; + } + else // update droop event counts + { - // return the event status for the requested core and - // corresponding quad. - // Clear the counter for only the droop events returned. - if(g_vdm_cache_large_droop_count[i_core / CORES_PER_QUAD] != 0) - { - o_data->droop.cache_large_event = 1; - g_vdm_cache_large_droop_count[i_core / CORES_PER_QUAD] = 0; - } + if((value64 & CACHE_VDM_LARGE_DROOP) == 0) + { + ++g_vdm_cache_large_droop_count[i_core / CORES_PER_QUAD]; + } - if(g_vdm_core_small_droop_count[i_core] != 0) - { - o_data->droop.core_small_event = 1; - g_vdm_core_small_droop_count[i_core] = 0; - } + idx = (i_core / CORES_PER_QUAD) * CORES_PER_QUAD; - // EMPATH - // Send command to select which emmpath counter to read - uint64_t empath_scom_data = CORE_RAW_CYCLES; - PPE_STVD(coreSelect + PC_OCC_SPRC, empath_scom_data) + if((value64 & CORE0_VDM_SMALL_DROOP) == 0) + { + ++g_vdm_core_small_droop_count[idx]; + } - // Read counters. - // Counter selected auto increments to the next counter after each read. + if((value64 & CORE1_VDM_SMALL_DROOP) == 0) + { + ++g_vdm_core_small_droop_count[idx+1]; + } - //CORE_RAW_CYCLES - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.raw_cycles = (uint32_t)empath_scom_data; + if((value64 & CORE2_VDM_SMALL_DROOP) == 0) + { + ++g_vdm_core_small_droop_count[idx+2]; + } - //CORE_RUN_CYCLES - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.run_cycles = (uint32_t)empath_scom_data; + if((value64 & CORE3_VDM_SMALL_DROOP) == 0) + { + ++g_vdm_core_small_droop_count[idx+3]; + } - //CORE_WORKRATE_BUSY - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.freq_sens_busy = (uint32_t)empath_scom_data; + // return the event status for the requested core and + // corresponding quad. + // Clear the counter for only the droop events returned. + if(g_vdm_cache_large_droop_count[i_core / CORES_PER_QUAD] != 0) + { + o_data->droop.cache_large_event = 1; + g_vdm_cache_large_droop_count[i_core / CORES_PER_QUAD] = 0; + } - //CORE_WORKRATE_FINISH - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.freq_sens_finish = (uint32_t)empath_scom_data; + if(g_vdm_core_small_droop_count[i_core] != 0) + { + o_data->droop.core_small_event = 1; + g_vdm_core_small_droop_count[i_core] = 0; + } + } - //CORE_MEM_HIER_A_LATENCY - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.mem_latency_a = (uint32_t)empath_scom_data; + // ============= + // EMPATH + // ============= + // Send command to select which emmpath counter to read + do + { + uint64_t empath_scom_data = CORE_RAW_CYCLES; + rc = putscom(coreSelect, PC_OCC_SPRC, empath_scom_data); + if (rc) + { + break; + } - //CORE_MEM_HIER_B_LATENCY - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.mem_latency_b = (uint32_t)empath_scom_data; + // Read counters. + // Counter selected auto increments to the next counter after each read. - //CORE_MEM_HIER_C_ACCESS - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->empath.mem_access_c = (uint32_t)empath_scom_data; + //CORE_RAW_CYCLES + rc = getscom(coreSelect, PC_OCC_SPRD, &empath_scom_data); + if (rc) + { + break; + } + o_data->empath.raw_cycles = (uint32_t)empath_scom_data; - int thread = 0; + //CORE_RUN_CYCLES + rc = getscom(coreSelect, PC_OCC_SPRD, &empath_scom_data); + if (rc) + { + break; + } + o_data->empath.run_cycles = (uint32_t)empath_scom_data; - for( ; thread < EMPATH_CORE_THREADS; ++thread ) - { - // THREAD_RUN_CYCLES - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->per_thread[thread].run_cycles = (uint32_t)empath_scom_data; + //CORE_WORKRATE_BUSY + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->empath.freq_sens_busy = (uint32_t)empath_scom_data; - // THREAD_INST_DISP_UTIL - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->per_thread[thread].dispatch = (uint32_t)empath_scom_data; + //CORE_WORKRATE_FINISH + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->empath.freq_sens_finish = (uint32_t)empath_scom_data; - // THREAD_INST_COMP_UTIL - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->per_thread[thread].completion = (uint32_t)empath_scom_data; + //CORE_MEM_HIER_A_LATENCY + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->empath.mem_latency_a = (uint32_t)empath_scom_data; - // THREAD_MEM_HEIR_C_ACCESS - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->per_thread[thread].mem_c = (uint32_t)empath_scom_data; - } + //CORE_MEM_HIER_B_LATENCY + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->empath.mem_latency_b = (uint32_t)empath_scom_data; - //IFU_THROTTLE_BLOCK_FETCH - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->throttle.ifu_throttle = (uint32_t)empath_scom_data; + //CORE_MEM_HIER_C_ACCESS + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->empath.mem_access_c = (uint32_t)empath_scom_data; - //IFU_THROTTLE_ACTIVE - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->throttle.ifu_active = (uint32_t)empath_scom_data; + int thread = 0; - //VOLT_DROOP_THROTTLE_ACTIVE - PPE_LVD(coreSelect + PC_OCC_SPRD, empath_scom_data); - o_data->throttle.v_droop = (uint32_t)empath_scom_data; + for( ; thread < EMPATH_CORE_THREADS; ++thread ) + { + // THREAD_RUN_CYCLES + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->per_thread[thread].run_cycles = (uint32_t)empath_scom_data; + + // THREAD_INST_DISP_UTIL + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->per_thread[thread].dispatch = (uint32_t)empath_scom_data; + + // THREAD_INST_COMP_UTIL + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->per_thread[thread].completion = (uint32_t)empath_scom_data; + + // THREAD_MEM_HEIR_C_ACCESS + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->per_thread[thread].mem_c = (uint32_t)empath_scom_data; + } + if (rc) + { + break; + } - // TOD value - PPE_LVD(TOD_VALUE_REG, empath_scom_data); - o_data->empath.tod_2mhz = (uint32_t)(empath_scom_data >> 8); //[24..56] + //IFU_THROTTLE_BLOCK_FETCH + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->throttle.ifu_throttle = (uint32_t)empath_scom_data; - // STOP_STATE_HIST_OCC_REG - PPE_LVD(coreSelect + STOP_STATE_HIST_OCC_REG, empath_scom_data); - o_data->stop_state_hist = empath_scom_data; + //IFU_THROTTLE_ACTIVE + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->throttle.ifu_active = (uint32_t)empath_scom_data; - // Check rc accumulated - ignore rc == 0 - uint32_t sibrca = (mfmsr() & 0x0000007f); + //VOLT_DROOP_THROTTLE_ACTIVE + rc = getscom(coreSelect, PC_OCC_SPRD,&empath_scom_data); + if (rc) + { + break; + } + o_data->throttle.v_droop = (uint32_t)empath_scom_data; - if(sibrca) - { - // Report most severe error in rc - rc = 7; - uint32_t mask = 1; + // TOD value + rc = getscom_abs(TOD_VALUE_REG,&empath_scom_data); + if (rc) + { + break; + } + o_data->empath.tod_2mhz = (uint32_t)(empath_scom_data >> 8); //[24..56] - for(; mask != 0x00000080; mask <<= 1) - { - if( mask & sibrca ) + // STOP_STATE_HIST_OCC_REG + rc = getscom(coreSelect, STOP_STATE_HIST_OCC_REG, &empath_scom_data); + if (rc) { break; } + o_data->stop_state_hist = empath_scom_data; + + o_data->empathValid = EMPATH_VALID; + } while(0); // EMPATH - --rc; + if (rc) + { + // Work-around for HW problem. See SW407201 + // If ADDRESS_ERROR then perform a SCOM write of all zeros to + // 2n010800 where n is the core number. Ignore ADDRESS_ERROR + // returned. EMPATH_VALID will be left unset to indicate the + // EMPATH data is not valid, however, return SUCCESS to indicate + // the DTS data is good. + if(rc == SIBRC_ADDRESS_ERROR) + { + uint64_t zeros = 0; + putscom(coreSelect,WORKAROUND_SCOM_ADDRESS, zeros); + rc = 0; + } } - } + + } while(0); // Clear masks SIB masks (MSR_SEM) // Clear SIBRC and SIMBRCA |