diff options
author | Patrick Williams <iawillia@us.ibm.com> | 2011-07-15 15:35:41 -0500 |
---|---|---|
committer | A. Patrick Williams III <iawillia@us.ibm.com> | 2011-07-25 15:47:45 -0500 |
commit | b679a1729d7aea0870544e886ddb4b03e1ecf4c4 (patch) | |
tree | 8b945be92f740a9719a70465df5d8ceb635f5f95 /src | |
parent | ff7d0cf68fab49bb965467bcd9a6a8068e799349 (diff) | |
download | talos-hostboot-b679a1729d7aea0870544e886ddb4b03e1ecf4c4.tar.gz talos-hostboot-b679a1729d7aea0870544e886ddb4b03e1ecf4c4.zip |
Reduce working threads to first core.
This is required to reduce the memory footprint of the kernel
so we can fit within 2MB. This patch will cause (in simics)
all other cores/threads to execute a 'doze' instruction and
cease executing. In VBU, only 1 core will be active anyhow.
Change-Id: If1bdc01393b02d802ba7595a88dcf3331efc2d4e
Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/203
Tested-by: Jenkins Server
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: Douglas R. Gilbert <dgilbert@us.ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/include/kernel/cpuid.H | 74 | ||||
-rw-r--r-- | src/kernel/cpumgr.C | 34 | ||||
-rw-r--r-- | src/kernel/start.S | 52 |
3 files changed, 138 insertions, 22 deletions
diff --git a/src/include/kernel/cpuid.H b/src/include/kernel/cpuid.H new file mode 100644 index 000000000..fa9b0649b --- /dev/null +++ b/src/include/kernel/cpuid.H @@ -0,0 +1,74 @@ +#ifndef __KERNEL_CPUID_H +#define __KERNEL_CPUID_H + +#include <arch/ppc.H> + +/** @enum ProcessorCoreType + * @brief Enumeration of the different supported processor cores. + */ +enum ProcessorCoreType +{ + /** Base Power7 */ + POWER7, + /** Power7+ */ + POWER7_PLUS, + + /** Power8 "Salerno" (low-end) core */ + POWER8_SALERNO, + /** Power8 "Venice" (high-end) core */ + POWER8_VENICE, + + UNKNOWN, +}; + +/** @fn getCpuType() + * @brief Decode the processor type from the PVR register. + * + * These values come from the pervasive spec for each processor. + * + * @return ProcessorCoreType - Value from enumeration for this core. + */ +ALWAYS_INLINE +ProcessorCoreType getCpuType() +{ + uint64_t l_pvr = getPVR(); + + // Layout of the PVR is (32-bit): + // 2 nibbles reserved. + // 2 nibbles chip type. + // 1 nibble technology. + // 1 nibble major DD. + // 1 nibble reserved. + // 1 nibble minor DD. + + switch(l_pvr & 0xFFFF0000) + { + case 0x003F0000: + return POWER7; + + case 0x004A0000: + return POWER7_PLUS; + + case 0x004B0000: + return POWER8_VENICE; + + default: + return UNKNOWN; + } +} + +/** @fn getCpuDD + * @brief Decode the processor DD level from the PVR register. + * + * These offsets come from the pervasive spec for each processor. + * + * @return 1 byte DD level as <major nibble, minor nibble>. + */ +ALWAYS_INLINE +uint8_t getCpuDD() +{ + uint64_t l_pvr = getPVR(); + return ((l_pvr & 0x0F00) >> 4) | (l_pvr & 0x000F); +} + +#endif diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C index 7be1cf2dc..fc421431e 100644 --- a/src/kernel/cpumgr.C +++ b/src/kernel/cpumgr.C @@ -1,3 +1,4 @@ +#include <assert.h> #include <kernel/cpumgr.H> #include <kernel/task.H> #include <kernel/cpu.H> @@ -9,6 +10,7 @@ #include <arch/ppc.H> #include <kernel/timemgr.H> #include <sys/sync.h> +#include <kernel/cpuid.H> cpu_t* CpuManager::cv_cpus[CpuManager::MAXCPUS] = { NULL }; bool CpuManager::cv_shutdown_requested = false; @@ -27,8 +29,36 @@ cpu_t* CpuManager::getCurrentCPU() void CpuManager::init() { - for (int i = 0; i < KERNEL_MAX_SUPPORTED_CPUS; i++) - Singleton<CpuManager>::instance().startCPU(i); + // For the initial boot we only want to set up CPU objects for the threads + // on this core. Otherwise we waste memory with kernel / idle task stacks. + // + // As long as the CPU object pointer is NULL, the start.S code won't + // enter the kernel, so we skip initializing all the other CPUs for now. + + // Determine number of threads on this core. + size_t threads = -1; + switch (getCpuType()) + { + case POWER7: + case POWER7_PLUS: + threads = 4; + break; + + case POWER8_VENICE: + case POWER8_SALERNO: + threads = 8; + break; + + case UNKNOWN: + default: + kassert(false); + break; + } + + // Create CPU objects starting at the thread-0 for this core. + size_t baseCpu = getPIR() & ~(threads-1); + for (size_t i = 0; i < threads; i++) + Singleton<CpuManager>::instance().startCPU(i + baseCpu); } void CpuManager::init_slave_smp(cpu_t* cpu) diff --git a/src/kernel/start.S b/src/kernel/start.S index 381e33bde..68af33c04 100644 --- a/src/kernel/start.S +++ b/src/kernel/start.S @@ -6,7 +6,7 @@ _start: ;// Set thread priority high. or 3,3,3 - + ;// Determine if this is the first thread. li r4, 2 ;// Read spinlock value. @@ -30,7 +30,7 @@ pre_relocate: lis r1,0x0010 cmpl cr0,r2,r1 ;// Check LR is less than 1MB blt finished_relocate ;// No need to relocate if less than 1MB - + ;// Get addresses for relocation. ;// Write address in r5 ;// Read address in r1 @@ -39,7 +39,7 @@ pre_relocate: and r1,r1,r2 ;// and with pre_relocate's address from r2 to get start of ;// rom section. - + ;// Update LR to low address. clrldi r2,r2,48 ;// Equiv to ~(0x0FFFF) mtlr 2 @@ -47,7 +47,7 @@ pre_relocate: ;// Moving 1MB , so load r2 with (1MB / 8 bytes per word) lis r2, 0x2 mtctr r2 -relocate_loop: +relocate_loop: ;// The dcbst/sync/icbi/isync sequence comes from PowerISA ld r4, 0(r1) std r4, 0(r5) @@ -58,19 +58,19 @@ relocate_loop: addi r1,r1,8 addi r5,r5,8 bdnz+ relocate_loop - + ;// Now that we've relocated, erase exception prefix. mfmsr r11 - + rldicl r11,r11,57,1 ;// Erase bit 6 ( equiv to r11 & ~(0x40)) rotldi r11,r11,7 - + mtmsr r11 - + ;// Jump to low address. blr -finished_relocate: +finished_relocate: ;// Jump to main. b _main */ @@ -133,7 +133,7 @@ UNIMPL_INTERRUPT(hype_decrementer, 0x980) ;// that require C++ code for handling but also implements a fast-path for ;// some simple calls, such as reading protected SPRs. ;// -;// Since this is called from userspace as a function call (see __syscall* +;// Since this is called from userspace as a function call (see __syscall* ;// functions) we only need to honor the ELF ABI calling conventions. That ;// means some registers and condition fields can be considered volatile and ;// modified prior to being saved. @@ -141,7 +141,7 @@ UNIMPL_INTERRUPT(hype_decrementer, 0x980) .org _start + 0xC00 intvect_system_call_fast: cmpi cr0, r3, 0x0800 - bge cr0, system_call_fast_path + bge cr0, system_call_fast_path STD_INTERRUPT(system_call, 0xC08) UNIMPL_INTERRUPT(trace, 0xD00) @@ -161,7 +161,7 @@ _main: lis r2, main@h ori r2, r2, main@l ld r2,8(r2) - + ;// Set up initial stack, space for 8 double-words lis r1, kernel_stack@h ori r1, r1, kernel_stack@l @@ -173,7 +173,7 @@ _main_loop: b _main_loop ;// _other_thread_spinlock: -;// Used for threads other than first to wait for the system to boot to a +;// Used for threads other than first to wait for the system to boot to a ;// stable point where we can start the other threads. At this point ;// nothing is initalized in the thread. _other_thread_spinlock: @@ -199,17 +199,29 @@ _other_thread_spinlock_complete: muli r3, r1, 8 add r2, r3, r2 ld r3, 0(r2) ;// Load CPU object. + cmpi cr0, r3, 0 ;// Check for NULL CPU object. + beq- cr0, 1f ;// Jump to handling if no CPU object found. ld r1, 0(r3) ;// Load initial stack. - + lis r2, smp_slave_main@h ;// Load TOC base. ori r2, r2, smp_slave_main@l ld r2, 8(r2) bl smp_slave_main ;// Call smp_slave_main b _main_loop +1: + ;// No CPU object available, doze this CPU. + ;// We should only get to this point on simics. SBE will only wake up + ;// a single core / thread at a time and we are responsible for + ;// further sequencing. + doze + b 1b + + + ;// @fn kernel_save_task ;// Saves context to task structure and branches back to requested addr. - ;// + ;// ;// Requires: ;// * SPRG3 -> Task Structure. ;// * SPRG0 -> Return address. @@ -261,7 +273,7 @@ kernel_save_task: std r29, TASK_GPR_29(r1) ;// Save GPR29 std r30, TASK_GPR_30(r1) ;// Save GPR30 std r31, TASK_GPR_31(r1) ;// Save GPR31 - + ld r1, 0(r1) ;// Get CPU pointer ld r1, 0(r1) ;// Get kernel stack pointer. @@ -272,7 +284,7 @@ kernel_save_task: ;// @fn dispatch_task ;// Loads context from task structure and performs rfi. ;// - ;// Requires: + ;// Requires: ;// * SPRG3 -> Task Structure. ;// * Current contents of registers are not needed. kernel_dispatch_task: @@ -321,7 +333,7 @@ kernel_dispatch_task: ld r27, TASK_GPR_27(r1) ;// GPR27 ld r28, TASK_LR(r1) ;// Load from context: LR, CR, CTR, XER - ld r29, TASK_CR(r1) + ld r29, TASK_CR(r1) ld r30, TASK_CTR(r1) ld r31, TASK_XER(r1) mtlr r28 ;// Restore LR @@ -334,7 +346,7 @@ kernel_dispatch_task: ld r30, TASK_GPR_30(r1) ;// GPR30 ld r31, TASK_GPR_31(r1) ;// GPR31 ld r1, TASK_GPR_1(r1) ;// GPR1 - + rfid ;// Execute task. ;// @fn system_call_fast_path @@ -366,7 +378,7 @@ system_call_fast_path: ;// jump to task entry point. Used so the kernel doesn't need to ;// dereference userspace addresses (which could be bad). ;// - ;// Requires: + ;// Requires: ;// * GPR4 -> Function pointer. ;// * LR -> task_end stub. ;// * GPR3 -> Task argument. |