summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/include/kernel/cpuid.H74
-rw-r--r--src/kernel/cpumgr.C34
-rw-r--r--src/kernel/start.S52
3 files changed, 138 insertions, 22 deletions
diff --git a/src/include/kernel/cpuid.H b/src/include/kernel/cpuid.H
new file mode 100644
index 000000000..fa9b0649b
--- /dev/null
+++ b/src/include/kernel/cpuid.H
@@ -0,0 +1,74 @@
+#ifndef __KERNEL_CPUID_H
+#define __KERNEL_CPUID_H
+
+#include <arch/ppc.H>
+
+/** @enum ProcessorCoreType
+ * @brief Enumeration of the different supported processor cores.
+ */
+enum ProcessorCoreType
+{
+ /** Base Power7 */
+ POWER7,
+ /** Power7+ */
+ POWER7_PLUS,
+
+ /** Power8 "Salerno" (low-end) core */
+ POWER8_SALERNO,
+ /** Power8 "Venice" (high-end) core */
+ POWER8_VENICE,
+
+ UNKNOWN,
+};
+
+/** @fn getCpuType()
+ * @brief Decode the processor type from the PVR register.
+ *
+ * These values come from the pervasive spec for each processor.
+ *
+ * @return ProcessorCoreType - Value from enumeration for this core.
+ */
+ALWAYS_INLINE
+ProcessorCoreType getCpuType()
+{
+ uint64_t l_pvr = getPVR();
+
+ // Layout of the PVR is (32-bit):
+ // 2 nibbles reserved.
+ // 2 nibbles chip type.
+ // 1 nibble technology.
+ // 1 nibble major DD.
+ // 1 nibble reserved.
+ // 1 nibble minor DD.
+
+ switch(l_pvr & 0xFFFF0000)
+ {
+ case 0x003F0000:
+ return POWER7;
+
+ case 0x004A0000:
+ return POWER7_PLUS;
+
+ case 0x004B0000:
+ return POWER8_VENICE;
+
+ default:
+ return UNKNOWN;
+ }
+}
+
+/** @fn getCpuDD
+ * @brief Decode the processor DD level from the PVR register.
+ *
+ * These offsets come from the pervasive spec for each processor.
+ *
+ * @return 1 byte DD level as <major nibble, minor nibble>.
+ */
+ALWAYS_INLINE
+uint8_t getCpuDD()
+{
+ uint64_t l_pvr = getPVR();
+ return ((l_pvr & 0x0F00) >> 4) | (l_pvr & 0x000F);
+}
+
+#endif
diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C
index 7be1cf2dc..fc421431e 100644
--- a/src/kernel/cpumgr.C
+++ b/src/kernel/cpumgr.C
@@ -1,3 +1,4 @@
+#include <assert.h>
#include <kernel/cpumgr.H>
#include <kernel/task.H>
#include <kernel/cpu.H>
@@ -9,6 +10,7 @@
#include <arch/ppc.H>
#include <kernel/timemgr.H>
#include <sys/sync.h>
+#include <kernel/cpuid.H>
cpu_t* CpuManager::cv_cpus[CpuManager::MAXCPUS] = { NULL };
bool CpuManager::cv_shutdown_requested = false;
@@ -27,8 +29,36 @@ cpu_t* CpuManager::getCurrentCPU()
void CpuManager::init()
{
- for (int i = 0; i < KERNEL_MAX_SUPPORTED_CPUS; i++)
- Singleton<CpuManager>::instance().startCPU(i);
+ // For the initial boot we only want to set up CPU objects for the threads
+ // on this core. Otherwise we waste memory with kernel / idle task stacks.
+ //
+ // As long as the CPU object pointer is NULL, the start.S code won't
+ // enter the kernel, so we skip initializing all the other CPUs for now.
+
+ // Determine number of threads on this core.
+ size_t threads = -1;
+ switch (getCpuType())
+ {
+ case POWER7:
+ case POWER7_PLUS:
+ threads = 4;
+ break;
+
+ case POWER8_VENICE:
+ case POWER8_SALERNO:
+ threads = 8;
+ break;
+
+ case UNKNOWN:
+ default:
+ kassert(false);
+ break;
+ }
+
+ // Create CPU objects starting at the thread-0 for this core.
+ size_t baseCpu = getPIR() & ~(threads-1);
+ for (size_t i = 0; i < threads; i++)
+ Singleton<CpuManager>::instance().startCPU(i + baseCpu);
}
void CpuManager::init_slave_smp(cpu_t* cpu)
diff --git a/src/kernel/start.S b/src/kernel/start.S
index 381e33bde..68af33c04 100644
--- a/src/kernel/start.S
+++ b/src/kernel/start.S
@@ -6,7 +6,7 @@
_start:
;// Set thread priority high.
or 3,3,3
-
+
;// Determine if this is the first thread.
li r4, 2
;// Read spinlock value.
@@ -30,7 +30,7 @@ pre_relocate:
lis r1,0x0010
cmpl cr0,r2,r1 ;// Check LR is less than 1MB
blt finished_relocate ;// No need to relocate if less than 1MB
-
+
;// Get addresses for relocation.
;// Write address in r5
;// Read address in r1
@@ -39,7 +39,7 @@ pre_relocate:
and r1,r1,r2 ;// and with pre_relocate's address from r2 to get start of
;// rom section.
-
+
;// Update LR to low address.
clrldi r2,r2,48 ;// Equiv to ~(0x0FFFF)
mtlr 2
@@ -47,7 +47,7 @@ pre_relocate:
;// Moving 1MB , so load r2 with (1MB / 8 bytes per word)
lis r2, 0x2
mtctr r2
-relocate_loop:
+relocate_loop:
;// The dcbst/sync/icbi/isync sequence comes from PowerISA
ld r4, 0(r1)
std r4, 0(r5)
@@ -58,19 +58,19 @@ relocate_loop:
addi r1,r1,8
addi r5,r5,8
bdnz+ relocate_loop
-
+
;// Now that we've relocated, erase exception prefix.
mfmsr r11
-
+
rldicl r11,r11,57,1 ;// Erase bit 6 ( equiv to r11 & ~(0x40))
rotldi r11,r11,7
-
+
mtmsr r11
-
+
;// Jump to low address.
blr
-finished_relocate:
+finished_relocate:
;// Jump to main.
b _main
*/
@@ -133,7 +133,7 @@ UNIMPL_INTERRUPT(hype_decrementer, 0x980)
;// that require C++ code for handling but also implements a fast-path for
;// some simple calls, such as reading protected SPRs.
;//
-;// Since this is called from userspace as a function call (see __syscall*
+;// Since this is called from userspace as a function call (see __syscall*
;// functions) we only need to honor the ELF ABI calling conventions. That
;// means some registers and condition fields can be considered volatile and
;// modified prior to being saved.
@@ -141,7 +141,7 @@ UNIMPL_INTERRUPT(hype_decrementer, 0x980)
.org _start + 0xC00
intvect_system_call_fast:
cmpi cr0, r3, 0x0800
- bge cr0, system_call_fast_path
+ bge cr0, system_call_fast_path
STD_INTERRUPT(system_call, 0xC08)
UNIMPL_INTERRUPT(trace, 0xD00)
@@ -161,7 +161,7 @@ _main:
lis r2, main@h
ori r2, r2, main@l
ld r2,8(r2)
-
+
;// Set up initial stack, space for 8 double-words
lis r1, kernel_stack@h
ori r1, r1, kernel_stack@l
@@ -173,7 +173,7 @@ _main_loop:
b _main_loop
;// _other_thread_spinlock:
-;// Used for threads other than first to wait for the system to boot to a
+;// Used for threads other than first to wait for the system to boot to a
;// stable point where we can start the other threads. At this point
;// nothing is initalized in the thread.
_other_thread_spinlock:
@@ -199,17 +199,29 @@ _other_thread_spinlock_complete:
muli r3, r1, 8
add r2, r3, r2
ld r3, 0(r2) ;// Load CPU object.
+ cmpi cr0, r3, 0 ;// Check for NULL CPU object.
+ beq- cr0, 1f ;// Jump to handling if no CPU object found.
ld r1, 0(r3) ;// Load initial stack.
-
+
lis r2, smp_slave_main@h ;// Load TOC base.
ori r2, r2, smp_slave_main@l
ld r2, 8(r2)
bl smp_slave_main ;// Call smp_slave_main
b _main_loop
+1:
+ ;// No CPU object available, doze this CPU.
+ ;// We should only get to this point on simics. SBE will only wake up
+ ;// a single core / thread at a time and we are responsible for
+ ;// further sequencing.
+ doze
+ b 1b
+
+
+
;// @fn kernel_save_task
;// Saves context to task structure and branches back to requested addr.
- ;//
+ ;//
;// Requires:
;// * SPRG3 -> Task Structure.
;// * SPRG0 -> Return address.
@@ -261,7 +273,7 @@ kernel_save_task:
std r29, TASK_GPR_29(r1) ;// Save GPR29
std r30, TASK_GPR_30(r1) ;// Save GPR30
std r31, TASK_GPR_31(r1) ;// Save GPR31
-
+
ld r1, 0(r1) ;// Get CPU pointer
ld r1, 0(r1) ;// Get kernel stack pointer.
@@ -272,7 +284,7 @@ kernel_save_task:
;// @fn dispatch_task
;// Loads context from task structure and performs rfi.
;//
- ;// Requires:
+ ;// Requires:
;// * SPRG3 -> Task Structure.
;// * Current contents of registers are not needed.
kernel_dispatch_task:
@@ -321,7 +333,7 @@ kernel_dispatch_task:
ld r27, TASK_GPR_27(r1) ;// GPR27
ld r28, TASK_LR(r1) ;// Load from context: LR, CR, CTR, XER
- ld r29, TASK_CR(r1)
+ ld r29, TASK_CR(r1)
ld r30, TASK_CTR(r1)
ld r31, TASK_XER(r1)
mtlr r28 ;// Restore LR
@@ -334,7 +346,7 @@ kernel_dispatch_task:
ld r30, TASK_GPR_30(r1) ;// GPR30
ld r31, TASK_GPR_31(r1) ;// GPR31
ld r1, TASK_GPR_1(r1) ;// GPR1
-
+
rfid ;// Execute task.
;// @fn system_call_fast_path
@@ -366,7 +378,7 @@ system_call_fast_path:
;// jump to task entry point. Used so the kernel doesn't need to
;// dereference userspace addresses (which could be bad).
;//
- ;// Requires:
+ ;// Requires:
;// * GPR4 -> Function pointer.
;// * LR -> task_end stub.
;// * GPR3 -> Task argument.
OpenPOWER on IntegriCloud