diff options
-rw-r--r-- | src/include/kernel/cpuid.H | 74 | ||||
-rw-r--r-- | src/kernel/cpumgr.C | 34 | ||||
-rw-r--r-- | src/kernel/start.S | 52 |
3 files changed, 138 insertions, 22 deletions
diff --git a/src/include/kernel/cpuid.H b/src/include/kernel/cpuid.H new file mode 100644 index 000000000..fa9b0649b --- /dev/null +++ b/src/include/kernel/cpuid.H @@ -0,0 +1,74 @@ +#ifndef __KERNEL_CPUID_H +#define __KERNEL_CPUID_H + +#include <arch/ppc.H> + +/** @enum ProcessorCoreType + * @brief Enumeration of the different supported processor cores. + */ +enum ProcessorCoreType +{ + /** Base Power7 */ + POWER7, + /** Power7+ */ + POWER7_PLUS, + + /** Power8 "Salerno" (low-end) core */ + POWER8_SALERNO, + /** Power8 "Venice" (high-end) core */ + POWER8_VENICE, + + UNKNOWN, +}; + +/** @fn getCpuType() + * @brief Decode the processor type from the PVR register. + * + * These values come from the pervasive spec for each processor. + * + * @return ProcessorCoreType - Value from enumeration for this core. + */ +ALWAYS_INLINE +ProcessorCoreType getCpuType() +{ + uint64_t l_pvr = getPVR(); + + // Layout of the PVR is (32-bit): + // 2 nibbles reserved. + // 2 nibbles chip type. + // 1 nibble technology. + // 1 nibble major DD. + // 1 nibble reserved. + // 1 nibble minor DD. + + switch(l_pvr & 0xFFFF0000) + { + case 0x003F0000: + return POWER7; + + case 0x004A0000: + return POWER7_PLUS; + + case 0x004B0000: + return POWER8_VENICE; + + default: + return UNKNOWN; + } +} + +/** @fn getCpuDD + * @brief Decode the processor DD level from the PVR register. + * + * These offsets come from the pervasive spec for each processor. + * + * @return 1 byte DD level as <major nibble, minor nibble>. + */ +ALWAYS_INLINE +uint8_t getCpuDD() +{ + uint64_t l_pvr = getPVR(); + return ((l_pvr & 0x0F00) >> 4) | (l_pvr & 0x000F); +} + +#endif diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C index 7be1cf2dc..fc421431e 100644 --- a/src/kernel/cpumgr.C +++ b/src/kernel/cpumgr.C @@ -1,3 +1,4 @@ +#include <assert.h> #include <kernel/cpumgr.H> #include <kernel/task.H> #include <kernel/cpu.H> @@ -9,6 +10,7 @@ #include <arch/ppc.H> #include <kernel/timemgr.H> #include <sys/sync.h> +#include <kernel/cpuid.H> cpu_t* CpuManager::cv_cpus[CpuManager::MAXCPUS] = { NULL }; bool CpuManager::cv_shutdown_requested = false; @@ -27,8 +29,36 @@ cpu_t* CpuManager::getCurrentCPU() void CpuManager::init() { - for (int i = 0; i < KERNEL_MAX_SUPPORTED_CPUS; i++) - Singleton<CpuManager>::instance().startCPU(i); + // For the initial boot we only want to set up CPU objects for the threads + // on this core. Otherwise we waste memory with kernel / idle task stacks. + // + // As long as the CPU object pointer is NULL, the start.S code won't + // enter the kernel, so we skip initializing all the other CPUs for now. + + // Determine number of threads on this core. + size_t threads = -1; + switch (getCpuType()) + { + case POWER7: + case POWER7_PLUS: + threads = 4; + break; + + case POWER8_VENICE: + case POWER8_SALERNO: + threads = 8; + break; + + case UNKNOWN: + default: + kassert(false); + break; + } + + // Create CPU objects starting at the thread-0 for this core. + size_t baseCpu = getPIR() & ~(threads-1); + for (size_t i = 0; i < threads; i++) + Singleton<CpuManager>::instance().startCPU(i + baseCpu); } void CpuManager::init_slave_smp(cpu_t* cpu) diff --git a/src/kernel/start.S b/src/kernel/start.S index 381e33bde..68af33c04 100644 --- a/src/kernel/start.S +++ b/src/kernel/start.S @@ -6,7 +6,7 @@ _start: ;// Set thread priority high. or 3,3,3 - + ;// Determine if this is the first thread. li r4, 2 ;// Read spinlock value. @@ -30,7 +30,7 @@ pre_relocate: lis r1,0x0010 cmpl cr0,r2,r1 ;// Check LR is less than 1MB blt finished_relocate ;// No need to relocate if less than 1MB - + ;// Get addresses for relocation. ;// Write address in r5 ;// Read address in r1 @@ -39,7 +39,7 @@ pre_relocate: and r1,r1,r2 ;// and with pre_relocate's address from r2 to get start of ;// rom section. - + ;// Update LR to low address. clrldi r2,r2,48 ;// Equiv to ~(0x0FFFF) mtlr 2 @@ -47,7 +47,7 @@ pre_relocate: ;// Moving 1MB , so load r2 with (1MB / 8 bytes per word) lis r2, 0x2 mtctr r2 -relocate_loop: +relocate_loop: ;// The dcbst/sync/icbi/isync sequence comes from PowerISA ld r4, 0(r1) std r4, 0(r5) @@ -58,19 +58,19 @@ relocate_loop: addi r1,r1,8 addi r5,r5,8 bdnz+ relocate_loop - + ;// Now that we've relocated, erase exception prefix. mfmsr r11 - + rldicl r11,r11,57,1 ;// Erase bit 6 ( equiv to r11 & ~(0x40)) rotldi r11,r11,7 - + mtmsr r11 - + ;// Jump to low address. blr -finished_relocate: +finished_relocate: ;// Jump to main. b _main */ @@ -133,7 +133,7 @@ UNIMPL_INTERRUPT(hype_decrementer, 0x980) ;// that require C++ code for handling but also implements a fast-path for ;// some simple calls, such as reading protected SPRs. ;// -;// Since this is called from userspace as a function call (see __syscall* +;// Since this is called from userspace as a function call (see __syscall* ;// functions) we only need to honor the ELF ABI calling conventions. That ;// means some registers and condition fields can be considered volatile and ;// modified prior to being saved. @@ -141,7 +141,7 @@ UNIMPL_INTERRUPT(hype_decrementer, 0x980) .org _start + 0xC00 intvect_system_call_fast: cmpi cr0, r3, 0x0800 - bge cr0, system_call_fast_path + bge cr0, system_call_fast_path STD_INTERRUPT(system_call, 0xC08) UNIMPL_INTERRUPT(trace, 0xD00) @@ -161,7 +161,7 @@ _main: lis r2, main@h ori r2, r2, main@l ld r2,8(r2) - + ;// Set up initial stack, space for 8 double-words lis r1, kernel_stack@h ori r1, r1, kernel_stack@l @@ -173,7 +173,7 @@ _main_loop: b _main_loop ;// _other_thread_spinlock: -;// Used for threads other than first to wait for the system to boot to a +;// Used for threads other than first to wait for the system to boot to a ;// stable point where we can start the other threads. At this point ;// nothing is initalized in the thread. _other_thread_spinlock: @@ -199,17 +199,29 @@ _other_thread_spinlock_complete: muli r3, r1, 8 add r2, r3, r2 ld r3, 0(r2) ;// Load CPU object. + cmpi cr0, r3, 0 ;// Check for NULL CPU object. + beq- cr0, 1f ;// Jump to handling if no CPU object found. ld r1, 0(r3) ;// Load initial stack. - + lis r2, smp_slave_main@h ;// Load TOC base. ori r2, r2, smp_slave_main@l ld r2, 8(r2) bl smp_slave_main ;// Call smp_slave_main b _main_loop +1: + ;// No CPU object available, doze this CPU. + ;// We should only get to this point on simics. SBE will only wake up + ;// a single core / thread at a time and we are responsible for + ;// further sequencing. + doze + b 1b + + + ;// @fn kernel_save_task ;// Saves context to task structure and branches back to requested addr. - ;// + ;// ;// Requires: ;// * SPRG3 -> Task Structure. ;// * SPRG0 -> Return address. @@ -261,7 +273,7 @@ kernel_save_task: std r29, TASK_GPR_29(r1) ;// Save GPR29 std r30, TASK_GPR_30(r1) ;// Save GPR30 std r31, TASK_GPR_31(r1) ;// Save GPR31 - + ld r1, 0(r1) ;// Get CPU pointer ld r1, 0(r1) ;// Get kernel stack pointer. @@ -272,7 +284,7 @@ kernel_save_task: ;// @fn dispatch_task ;// Loads context from task structure and performs rfi. ;// - ;// Requires: + ;// Requires: ;// * SPRG3 -> Task Structure. ;// * Current contents of registers are not needed. kernel_dispatch_task: @@ -321,7 +333,7 @@ kernel_dispatch_task: ld r27, TASK_GPR_27(r1) ;// GPR27 ld r28, TASK_LR(r1) ;// Load from context: LR, CR, CTR, XER - ld r29, TASK_CR(r1) + ld r29, TASK_CR(r1) ld r30, TASK_CTR(r1) ld r31, TASK_XER(r1) mtlr r28 ;// Restore LR @@ -334,7 +346,7 @@ kernel_dispatch_task: ld r30, TASK_GPR_30(r1) ;// GPR30 ld r31, TASK_GPR_31(r1) ;// GPR31 ld r1, TASK_GPR_1(r1) ;// GPR1 - + rfid ;// Execute task. ;// @fn system_call_fast_path @@ -366,7 +378,7 @@ system_call_fast_path: ;// jump to task entry point. Used so the kernel doesn't need to ;// dereference userspace addresses (which could be bad). ;// - ;// Requires: + ;// Requires: ;// * GPR4 -> Function pointer. ;// * LR -> task_end stub. ;// * GPR3 -> Task argument. |