8 files changed, 618 insertions, 152 deletions
diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C
index 943ef48fd..909661cca 100644
--- a/src/kernel/cpumgr.C
+++ b/src/kernel/cpumgr.C
@@ -38,13 +38,13 @@
 #include <kernel/heapmgr.H>
 #include <kernel/intmsghandler.H>
 #include <errno.h>
+#include <kernel/deferred.H>
+#include <kernel/misc.H>
 
 cpu_t** CpuManager::cv_cpus = NULL;
 bool CpuManager::cv_shutdown_requested = false;
 uint64_t CpuManager::cv_shutdown_status = 0;
-Barrier CpuManager::cv_barrier;
-bool CpuManager::cv_defrag = false;
-size_t CpuManager::cv_cpuCount = 0;
+size_t CpuManager::cv_cpuSeq = 0;
 bool CpuManager::cv_forcedMemPeriodic = false;
 InteractiveDebug CpuManager::cv_interactive_debug;
 
@@ -95,6 +95,35 @@ void CpuManager::requestShutdown(uint64_t i_status)
     cv_shutdown_status = i_status;
     __sync_synchronize();
     cv_shutdown_requested = true;
+
+    class ExecuteShutdown : public DeferredWork
+    {
+        public:
+            void masterPreWork()
+            {
+                // The stats can be retrieved from global variables as needed.
+                // This can be uncommented for debug if desired
+                #ifdef __MEMSTATS__
+                if(c->master)
+                    HeapManager::stats();
+                #endif
+            }
+
+            void activeMainWork()
+            {
+                KernelMisc::shutdown();
+            }
+
+            void nonactiveMainWork()
+            {
+                // Something wasn't synchronized correctly if we got to here.
+                // Should not have CPUs coming online while trying to execute
+                // a shutdown.
+                kassert(false);
+            }
+    };
+
+    DeferredQueue::insert(new ExecuteShutdown());
 }
 
 void CpuManager::startCPU(ssize_t i)
@@ -163,8 +192,20 @@ void CpuManager::activateCPU(cpu_t * i_cpu)
 {
     // Set active.
     i_cpu->active = true;
-    __sync_add_and_fetch(&cv_cpuCount, 1);
-    lwsync();
+
+    // Update sequence ID.
+    do
+    {
+        uint64_t old_seq = cv_cpuSeq;
+        i_cpu->cpu_start_seqid = old_seq + 1 + (1ull << 32);
+
+        if (__sync_bool_compare_and_swap(&cv_cpuSeq, old_seq,
+                                         i_cpu->cpu_start_seqid))
+        {
+            break;
+        }
+    } while (1);
+    i_cpu->cpu_start_seqid >>= 32;
 
     // Verify / set SPRs.
     uint64_t msr = getMSR();
@@ -172,6 +213,24 @@ void CpuManager::activateCPU(cpu_t * i_cpu)
     setLPCR(WAKEUP_LPCR_VALUE);
 }
 
+void CpuManager::deactivateCPU(cpu_t * i_cpu)
+{
+    // Set inactive.
+    i_cpu->active = false;
+
+    // Update sequence ID.
+    do
+    {
+        uint64_t old_seq = cv_cpuSeq;
+        uint64_t new_seq = old_seq - 1 + (1ull << 32);
+
+        if (__sync_bool_compare_and_swap(&cv_cpuSeq, old_seq, new_seq))
+        {
+            break;
+        }
+    } while(1);
+}
+
 void CpuManager::executePeriodics(cpu_t * i_cpu)
 {
     if(i_cpu->master)
@@ -211,32 +270,22 @@ void CpuManager::executePeriodics(cpu_t * i_cpu)
         if((0 == (i_cpu->periodic_count % CPU_PERIODIC_DEFRAG)) ||
            (forceMemoryPeriodic))
         {
-            // set up barrier based on # cpus cv_barrier;
-            // TODO whatif other cpus become active?
-            isync(); // Ensure all instructions complete before this point, so
-                     // we don't get a stale shutdown_requested.
-            if(!cv_shutdown_requested)
+            class MemoryCoalesce : public DeferredWork
             {
-                cv_barrier.init(cv_cpuCount);
-                lwsync();  // Ensure barrier init is globally visible before
-                           // setting defrag = true.
-                cv_defrag = true;
-            }
+                public:
+                    void masterPreWork()
+                    {
+                        HeapManager::coalesce();
+                        PageManager::coalesce();
+                    }
+            };
+
+            DeferredQueue::insert(new MemoryCoalesce());
         }
     }
-    if(cv_defrag)
-    {
-        cv_barrier.wait();
 
-        if(i_cpu->master)
-        {
-            HeapManager::coalesce();
-            PageManager::coalesce();
-            cv_defrag = false;
-        }
+    DeferredQueue::execute();
 
-        cv_barrier.wait();
-    }
 }
 
 int CpuManager::startCore(uint64_t pir)
diff --git a/src/kernel/deferred.C b/src/kernel/deferred.C
new file mode 100644
index 000000000..dc6494857
--- /dev/null
+++ b/src/kernel/deferred.C
@@ -0,0 +1,268 @@
+/*  IBM_PROLOG_BEGIN_TAG
+ *  This is an automatically generated prolog.
+ *
+ *  $Source: src/kernel/deferred.C $
+ *
+ *  IBM CONFIDENTIAL
+ *
+ *  COPYRIGHT International Business Machines Corp. 2012
+ *
+ *  p1
+ *
+ *  Object Code Only (OCO) source materials
+ *  Licensed Internal Code Source Materials
+ *  IBM HostBoot Licensed Internal Code
+ *
+ *  The source code for this program is not published or other-
+ *  wise divested of its trade secrets, irrespective of what has
+ *  been deposited with the U.S. Copyright Office.
+ *
+ *  Origin: 30
+ *
+ *  IBM_PROLOG_END_TAG
+ */
+#include <kernel/deferred.H>
+#include <kernel/cpumgr.H>
+#include <util/singleton.H>
+#include <assert.h>
+#include <arch/ppc.H>
+
+/** Extract the DeferredWork pointer part of a iv_cpus_and_next instance var. */
+#define DEFERRED_QUEUE_GET_NEXT_PTR(item) \
+    reinterpret_cast<DeferredWork*>((item)->iv_cpus_and_next & 0xFFFFFFFF)
+
+/** Set the DeferredWork pointer part of a iv_cpus_and_next instance var. */
+#define DEFERRED_QUEUE_SET_NEXT_PTR(item) \
+    (item)->iv_cpus_and_next = \
+            ((item)->iv_cpus_and_next & 0xFFFFFFFF00000000ull) | \
+            reinterpret_cast<uint64_t>(item)
+
+/** Extract the CPU count portion of a iv_cpus_and_next instance var. */
+#define DEFERRED_QUEUE_GET_CPU_COUNT(item) (item)->iv_cpus_and_next >> 32
+
+// Initialize the work queue.
+DeferredQueue::DeferredQueue() : lock(), iv_cpus_and_next(0) {}
+
+DeferredQueue::~DeferredQueue()
+{
+    // Ensure that all work is completed.
+    kassert(0 == iv_cpus_and_next);
+}
+
+void DeferredQueue::insert(DeferredWork* i_work)
+{
+    // Call singleton insert.
+    Singleton<DeferredQueue>::instance()._insert(i_work);
+}
+
+void DeferredQueue::execute()
+{
+    // Call singleton execute.
+    Singleton<DeferredQueue>::instance()._execute();
+}
+
+void DeferredQueue::_insert(DeferredWork* i_work)
+{
+    lock.lock();
+
+    // NULL pointer implies empty, so just add work item.
+    if (0 == iv_cpus_and_next)
+    {
+        iv_cpus_and_next = reinterpret_cast<uint64_t>(i_work);
+    }
+    else
+    {
+        // Follow linked list to last work item.
+        DeferredWork* tail = DEFERRED_QUEUE_GET_NEXT_PTR(this);
+        while (NULL != DEFERRED_QUEUE_GET_NEXT_PTR(tail))
+        {
+            tail = DEFERRED_QUEUE_GET_NEXT_PTR(tail);
+        }
+
+        // Add work item to the end of the list.
+        DEFERRED_QUEUE_SET_NEXT_PTR(i_work);
+    }
+
+    lock.unlock();
+}
+
+void DeferredQueue::_execute()
+{
+    uint64_t cpus_and_next = 0;
+
+    // Increment the CPU count for pointer references.
+    do
+    {
+        cpus_and_next = iv_cpus_and_next;
+
+        if (0 == cpus_and_next) // No work to execute.
+        {
+            return;
+        }
+
+    } while(!__sync_bool_compare_and_swap(&iv_cpus_and_next,
+                                          cpus_and_next,
+                                          cpus_and_next + (1ull << 32)));
+
+    // Extract the item pointer.
+    DeferredWork* item =
+            reinterpret_cast<DeferredWork*>(cpus_and_next & 0xFFFFFFFF);
+
+    // Execute the extracted item.
+    item->start();
+}
+
+void DeferredQueue::_complete(DeferredWork* i_work)
+{
+    lock.lock();
+
+    // Update list-head to pop item off.
+    uint64_t new_ptr =
+            reinterpret_cast<uint64_t>(DEFERRED_QUEUE_GET_NEXT_PTR(i_work));
+    uint64_t old_ptr = 0;
+
+    do
+    {
+        old_ptr = iv_cpus_and_next;
+    } while(!__sync_bool_compare_and_swap(&iv_cpus_and_next, old_ptr, new_ptr));
+
+    // Get the CPU count from the old object pointer and wait until those
+    // CPUs get into i_work.
+    old_ptr >>= 32;
+    while (DEFERRED_QUEUE_GET_CPU_COUNT(i_work) != old_ptr)
+    {
+        setThreadPriorityLow();
+    }
+    setThreadPriorityHigh();
+
+    lock.unlock();
+}
+
+DeferredWork::DeferredWork() : iv_barrier(), iv_cpus_and_next(0),
+                               iv_activeSeqId(0),
+                               iv_releasePre(false), iv_releasePost(false)
+{
+    uint32_t cpuCount;
+
+    // Read the current CPU count and sequence number.
+    CpuManager::getCpuCountAndSeqId(cpuCount, iv_activeSeqId);
+    // Initialize the barrier with the number of active CPUs.
+    iv_barrier.init(cpuCount);
+}
+
+DeferredWork::~DeferredWork()
+{
+    // Ensure the work item was removed from the queue chain and no
+    // CPUs are still inside it.
+    kassert(0 == iv_cpus_and_next);
+}
+
+void DeferredWork::start()
+{
+    // Increment object reference count.
+    __sync_add_and_fetch(&iv_cpus_and_next, 1ull << 32);
+
+    // Get our CPU object and determine if we were active when the item
+    // was created. (Our sequence # being less or equal work item sequence #)
+    cpu_t* cpu = CpuManager::getCurrentCPU();
+    bool active = cpu->cpu_start_seqid <= iv_activeSeqId;
+
+    // Synchronize active CPUs.
+    if (active)
+    {
+        _waitForCpus();
+    }
+
+    // Call masterPre step.
+    if (cpu->master)
+    {
+        _masterPre();
+    }
+    else
+    {
+        _waitAtPre();
+    }
+
+    // Call MainWork step.
+    if (active)
+    {
+        activeMainWork();
+        _waitForCpus();
+    }
+    else
+    {
+        nonactiveMainWork();
+    }
+
+    // Call masterPost step.
+    if (cpu->master)
+    {
+        _masterPost();
+    }
+    else
+    {
+        _waitAtPost();
+    }
+
+    // Release reference to this object.
+    _cleanup();
+}
+
+void DeferredWork::_waitForCpus()
+{
+    iv_barrier.wait();
+}
+
+void DeferredWork::_masterPre()
+{
+    masterPreWork();
+
+    // Ensure memory ops are globally visible before releasing all CPUs.
+    lwsync();
+    iv_releasePre = true;
+}
+
+void DeferredWork::_waitAtPre()
+{
+    while(!iv_releasePre)
+    {
+        setThreadPriorityLow();
+    }
+    isync();  // Prevent spec. execution past this point until released.
+    setThreadPriorityHigh();
+}
+
+void DeferredWork::_masterPost()
+{
+    masterPostWork();
+
+    // Remove ourself from the queue chain now.
+    Singleton<DeferredQueue>::instance()._complete(this);
+
+    // Ensure memory ops are globally visible before releasing all CPUs.
+    lwsync();
+    iv_releasePost = true;
+}
+
+void DeferredWork::_waitAtPost()
+{
+    while(!iv_releasePost)
+    {
+        setThreadPriorityLow();
+    }
+    isync();  // Prevent spec. execution past this point until released.
+    setThreadPriorityHigh();
+}
+
+void DeferredWork::_cleanup()
+{
+    // Decrement reference count.
+    uint64_t cpu_count =
+        __sync_sub_and_fetch(&iv_cpus_and_next, 1ull << 32) >> 32;
+
+    // If the last object, delete this work item.
+    if (0 == cpu_count)
+    {
+        delete this;
+    }
+}
diff --git a/src/kernel/exception.C b/src/kernel/exception.C
index 0d63d0a73..071f10b1e 100644
--- a/src/kernel/exception.C
+++ b/src/kernel/exception.C
@@ -38,7 +38,7 @@ namespace ExceptionHandles
     bool PrivInstr(task_t*);
 }
 
-const uint64_t EXCEPTION_SRR1_MASK 	= 0x00000000783F0000;
+const uint64_t EXCEPTION_SRR1_MASK      = 0x00000000783F0000;
 const uint64_t EXCEPTION_SRR1_PRIVINS   = 0x0000000000040000;
 
 extern "C"
@@ -56,12 +56,12 @@ void kernel_execute_prog_ex()
     }
     if (!handled)
     {
-	printk("Program exception, killing task %d\n", t->tid);
-	TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
+        printk("Program exception, killing task %d\n", t->tid);
+        TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
     }
 }
 
-const uint64_t EXCEPTION_DSISR_MASK	= 0x0000000048000000;
+const uint64_t EXCEPTION_DSISR_MASK     = 0x0000000048000000;
 const uint64_t EXCEPTION_DSISR_PTEMISS  = 0x0000000040000000;
 const uint64_t EXCEPTION_DSISR_PERMERR  = 0x0000000008000000;
 const uint64_t EXCEPTION_DSISR_STORE    = 0x0000000002000000;
@@ -75,7 +75,7 @@ void kernel_execute_data_storage()
     bool handled = false;
     switch(exception)
     {
-	case EXCEPTION_DSISR_PTEMISS:
+        case EXCEPTION_DSISR_PTEMISS:
         {
             uint64_t is_store = getDSISR() & EXCEPTION_DSISR_STORE;
             handled = VmmManager::pteMiss(t, getDAR(), 0 != is_store);
@@ -87,16 +87,16 @@ void kernel_execute_data_storage()
             uint64_t is_store = getDSISR() & EXCEPTION_DSISR_STORE;
             if (is_store)
             {
-	        handled = VmmManager::pteMiss(t, getDAR(), true);
+                handled = VmmManager::pteMiss(t, getDAR(), true);
             }
-	    break;
+            break;
         }
     }
     if (!handled)
     {
-	printk("Data Storage exception on %d: %lx, %lx @ %p\n",
-	       t->tid, getDAR(), getDSISR(), t->context.nip);
-	TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
+        printk("Data Storage exception on %d: %lx, %lx @ %p\n",
+               t->tid, getDAR(), getDSISR(), t->context.nip);
+        TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
     }
 }
 
@@ -129,7 +129,7 @@ void kernel_execute_inst_storage()
     {
         printk("Inst Storage exception on %d: %lx, %lx\n",
                t->tid, getSRR0(), getSRR1());
-	TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
+        TaskManager::endTask(t, NULL, TASK_STATUS_CRASHED);
     }
 }
 
@@ -169,19 +169,19 @@ namespace ExceptionHandles
         {
             uint32_t* instruction = reinterpret_cast<uint32_t*>(phys_addr);
 
-            // Check for 'doze' and skip over.  This avoids a task-crash
+            // Check for 'nap' and skip over.  This avoids a task-crash
             // if for some reason we entered back into the task without
             // priviledge raised.
-            if (*instruction == 0x4c000324)
+            if (*instruction == 0x4c000364)
             {
-                printk("Error: Doze executed with lowered permissions on %d\n",
+                printk("Error: Nap executed with lowered permissions on %d\n",
                        t->tid);
                 t->context.nip = static_cast<void*>(instruction + 1);
                 return true;
             }
         }
 
-	return false;
+        return false;
     }
 
 }
diff --git a/src/kernel/kernel.C b/src/kernel/kernel.C
index cbac9a20f..e29af240e 100644
--- a/src/kernel/kernel.C
+++ b/src/kernel/kernel.C
@@ -1,25 +1,26 @@
-//  IBM_PROLOG_BEGIN_TAG
-//  This is an automatically generated prolog.
-//
-//  $Source: src/kernel/kernel.C $
-//
-//  IBM CONFIDENTIAL
-//
-//  COPYRIGHT International Business Machines Corp. 2010 - 2011
-//
-//  p1
-//
-//  Object Code Only (OCO) source materials
-//  Licensed Internal Code Source Materials
-//  IBM HostBoot Licensed Internal Code
-//
-//  The source code for this program is not published or other-
-//  wise divested of its trade secrets, irrespective of what has
-//  been deposited with the U.S. Copyright Office.
-//
-//  Origin: 30
-//
-//  IBM_PROLOG_END
+/*  IBM_PROLOG_BEGIN_TAG
+ *  This is an automatically generated prolog.
+ *
+ *  $Source: src/kernel/kernel.C $
+ *
+ *  IBM CONFIDENTIAL
+ *
+ *  COPYRIGHT International Business Machines Corp. 2010-2012
+ *
+ *  p1
+ *
+ *  Object Code Only (OCO) source materials
+ *  Licensed Internal Code Source Materials
+ *  IBM HostBoot Licensed Internal Code
+ *
+ *  The source code for this program is not published or other-
+ *  wise divested of its trade secrets, irrespective of what has
+ *  been deposited with the U.S. Copyright Office.
+ *
+ *  Origin: 30
+ *
+ *  IBM_PROLOG_END_TAG
+ */
 #include <stdint.h>
 #include <kernel/console.H>
 #include <kernel/pagemgr.H>
@@ -33,6 +34,7 @@
 #include <kernel/vmmmgr.H>
 #include <kernel/timemgr.H>
 #include <sys/vfs.h>
+#include <kernel/deferred.H>
 
 #include <stdlib.h>
 
@@ -85,6 +87,7 @@ int smp_slave_main(cpu_t* cpu)
     CpuManager::init_slave_smp(cpu);
     VmmManager::init_slb();
     cpu->scheduler->setNextRunnable();
+    DeferredQueue::execute();
     kernel_dispatch_task();
     return 0;
 }
diff --git a/src/kernel/makefile b/src/kernel/makefile
index 06c7536be..c23cb7fe9 100644
--- a/src/kernel/makefile
+++ b/src/kernel/makefile
@@ -1,11 +1,11 @@
-#  IBM_PROLOG_BEGIN_TAG
+#   IBM_PROLOG_BEGIN_TAG
 #  This is an automatically generated prolog.
 #
 #  $Source: src/kernel/makefile $
 #
 #  IBM CONFIDENTIAL
 #
-#  COPYRIGHT International Business Machines Corp. 2010 - 2011
+#  COPYRIGHT International Business Machines Corp. 2010-2012
 #
 #  p1
 #
@@ -19,14 +19,14 @@
 #
 #  Origin: 30
 #
-#  IBM_PROLOG_END
+#  IBM_PROLOG_END_TAG
 ROOTPATH = ../..
 
 OBJS = start.o kernel.o console.o pagemgr.o heapmgr.o taskmgr.o cpumgr.o
 OBJS += syscall.o scheduler.o spinlock.o exception.o vmmmgr.o timemgr.o 
 OBJS += futexmgr.o ptmgr.o segmentmgr.o devicesegment.o basesegment.o
 OBJS += block.o cpuid.o misc.o msghandler.o blockmsghdlr.o stacksegment.o
-OBJS += softpatch_p7.o barrier.o idebug.o intmsghandler.o
+OBJS += softpatch_p7.o barrier.o idebug.o intmsghandler.o deferred.o
 
 include ${ROOTPATH}/config.mk
 
diff --git a/src/kernel/misc.C b/src/kernel/misc.C
index a29aaea21..d45473259 100644
--- a/src/kernel/misc.C
+++ b/src/kernel/misc.C
@@ -26,6 +26,8 @@
 #include <kernel/cpuid.H>
 #include <kernel/console.H>
 #include <kernel/barrier.H>
+#include <kernel/scheduler.H>
+#include <assert.h>
 
 extern "C" void kernel_shutdown(size_t, uint64_t, uint64_t) NO_RETURN;
 
@@ -82,15 +84,15 @@ namespace KernelMisc
 
             if (c->master)
             {
-                printk("No payload... doze'ing all threads.\n");
+                printk("No payload... nap'ing all threads.\n");
             }
 
-            // Clear LPCR values that wakes up from doze.  LPCR[49, 50, 51]
+            // Clear LPCR values that wakes up from nap.  LPCR[49, 50, 51]
             setLPCR(getLPCR() & (~0x0000000000007000));
 
             while(1)
             {
-                doze();
+                nap();
             }
         }
         else
@@ -110,4 +112,78 @@ namespace KernelMisc
                             g_payload_entry);
         }
     }
+
+    void WinkleCore::masterPreWork()
+    {
+        printk("Winkle threads - ");
+
+        // Save away the current timebase.  All threads are in this object
+        // now so they're not going to be using the time for anything else.
+        iv_timebase = getTB();
+    }
+
+    extern "C" void kernel_execute_winkle(task_t* t);
+
+    void WinkleCore::activeMainWork()
+    {
+        cpu_t* cpu = CpuManager::getCurrentCPU();
+        printk("%d", static_cast<int>(cpu->cpu & 0x7));
+
+        // Return current task to run-queue so it isn't lost.
+        cpu->scheduler->returnRunnable();
+        TaskManager::setCurrentTask(cpu->idle_task);
+
+        // Clear LPCR values that wakes up from winkle.  LPCR[49, 50, 51]
+        // Otherwise, there may be an interrupt pending or something that
+        // prevents us from fully entering winkle.
+        setLPCR(getLPCR() & (~0x0000000000007000));
+
+        // Deactivate CPU from kernel.
+        cpu->winkled = true;
+        CpuManager::deactivateCPU(cpu);
+
+        // Create kernel save area and store ptr in bottom of kernel stack.
+        task_t* saveArea = new task_t;
+        memset(saveArea, '\0', sizeof(task_t));
+        saveArea->context.msr_mask = 0xC030; // EE, PR, IR, DR.
+        *(reinterpret_cast<task_t**>(cpu->kernel_stack)) = saveArea;
+
+        // Execute winkle.
+        kernel_execute_winkle(saveArea);
+
+        // Re-activate CPU in kernel and re-init VMM SPRs.
+        delete saveArea;
+        cpu->winkled = false;
+        CpuManager::activateCPU(cpu);
+        VmmManager::init_slb();
+
+        // Select a new task if not the master CPU.  Master CPU will resume
+        // the code that called cpu_master_winkle().
+        if (!cpu->master)
+        {
+            cpu->scheduler->setNextRunnable();
+        }
+
+    }
+
+    void WinkleCore::masterPostWork()
+    {
+        printk(" - Awake!\n");
+
+        // Restore timebase.
+        setTB(iv_timebase);
+
+        // Restore caller of cpu_master_winkle().
+        TaskManager::setCurrentTask(iv_caller);
+
+    }
+
+    void WinkleCore::nonactiveMainWork()
+    {
+        // Race condition that should not occur...
+        //
+        // Attempted to winkle the master and another thread came online in
+        // the process.
+        kassert(false);
+    }
 };
diff --git a/src/kernel/start.S b/src/kernel/start.S
index 7d2d77845..f26592245 100644
--- a/src/kernel/start.S
+++ b/src/kernel/start.S
@@ -145,39 +145,8 @@ finished_relocate:
 	b kernel_dispatch_task; /* Return to task */
 
 .org _start + 0x100
-intvect_system_reset:
-    /* TODO: Add handling for SRESET from winkle for master. */
-    ;// Need to identify reason for SRESET and then perform appropriate
-    ;// action.
-    ;// Current support:
-    ;//         - Decrementer wake-up from doze.
-    ;//         - External interrupt (also wake-up from doze).
-    ;//         - IPI wake-up from winkle of slave core.
-
-    ;// Free up two registers temporarily.
-    mtsprg0 r1
-    mtsprg1 r2
-
-    ;// Save CR.
-    mfcr r1
-
-    ;// Find bit 42:44 of SRR1 (reason for SRESET).
-    mfsrr1 r2
-    extrdi r2, r2, 3, 42
-    ;// Check for decrementer (bits = 011).
-    cmpi cr0, r2, 0x3
-    beq+ intvect_system_reset_decrementer
-    ;// Check for external interrupt (bits = 010).
-    cmpi cr0, r2, 0x4
-    beq+ intvect_system_reset_external
-    ;// Check for HMI (bits = 101).
-    cmpi cr0, r2, 0x5
-    beq+ _start
-    ;// Check for bits 000, which is probably a Simics bug right now.
-    cmpi cr0, r2, 0x0
-    beq+ _start
-1:  ;// Unknown reason.
-    b 1b
+intvect_system_reset_stub:
+    b intvect_system_reset
 
 .org _start + 0x180
 intvect_inst_start:
@@ -272,7 +241,7 @@ softpatch_stub:
     mfsprg1 r1          ;// Restore R1 and use normal interrupt code.
 STD_INTERRUPT_NOADDR(softpatch)
 
-.section .text
+.section .text.kernelasm
 ;// _main:
 ;//	Set up stack and TOC and call kernel's main.
 _main:
@@ -302,8 +271,7 @@ _other_thread_spinlock:
 1:
     ld r3, 0(r2)
     ;// Loop until value is 1...
-    li r4, 1
-    cmp cr0, r3, r4
+    cmpi cr0, r3, 1
     beq _other_thread_spinlock_complete
     or 1,1,1 ;// Lower thread priority.
     b 1b
@@ -321,7 +289,7 @@ _other_thread_spinlock_complete:
     ld r3, 0(r2)		;// Load CPU object.
     cmpi cr0, r3, 0             ;// Check for NULL CPU object.
     beq- cr0, 1f                ;// Jump to handling if no CPU object found.
-    ld r1, 0(r3)		;// Load initial stack.
+    ld r1, CPU_KERNEL_STACK(r3)	;// Load initial stack.
 
     lis r2, smp_slave_main@h 	;// Load TOC base.
     ori r2, r2, smp_slave_main@l
@@ -329,11 +297,11 @@ _other_thread_spinlock_complete:
     bl smp_slave_main		;// Call smp_slave_main
     b _main_loop
 1:
-    ;// No CPU object available, doze this CPU.
+    ;// No CPU object available, nap this CPU.
         ;// We should only get to this point on simics.  SBE will only wake up
         ;// a single core / thread at a time and we are responsible for
         ;// further sequencing.
-    doze
+    nap
     b 1b
 
 
@@ -399,8 +367,8 @@ kernel_save_task:
     bne- cr0, 1f                ;// Jump to FP-save if != NULL.
 2:
 
-    ld r1, 0(r1)	;// Get CPU pointer
-    ld r1, 0(r1)	;// Get kernel stack pointer.
+    ld r1, TASK_CPUPTR(r1)	;// Get CPU pointer
+    ld r1, CPU_KERNEL_STACK(r1)	;// Get kernel stack pointer.
 
     mfsprg0 r0		;// Retrieve return address from SPRG0
     mtlr r0		;// Call
@@ -461,8 +429,8 @@ kernel_dispatch_task:
 .global kernel_dispatch_task
     mfsprg3 r1		;// Load task structure to r1.
 
-    ldarx r0, 0, r1	;// Clear the reservation by loading / storing
-    stdcx. r0, 0, r1	;// the CPU pointer in the task.
+    ldarx r0, TASK_CPUPTR, r1	;// Clear the reservation by loading / storing
+    stdcx. r0, TASK_CPUPTR, r1	;// the CPU pointer in the task.
 
     mfmsr r2		;// Get current MSR
     ori r2,r2, 0xC030	;// Enable MSR[EE,PR,IR,DR].
@@ -574,20 +542,88 @@ kernel_dispatch_task:
 
     b 2b
 
+intvect_system_reset:
+    ;// Need to identify reason for SRESET and then perform appropriate
+    ;// action.
+    ;// Current support:
+    ;//         - Initial sreset.
+    ;//         - Decrementer wake-up from nap.
+    ;//         - External interrupt from nap or winkle.
+    ;//         - IPI wake-up from winkle of slave core.
+
+    ;// Raise priority to high.
+    or 3,3,3
+
+    ;// Free up two registers temporarily.
+    mtsprg0 r1
+    mtsprg1 r2
+
+    ;// Check spinlock for 0, which implies we haven't started yet.
+    lis r2, kernel_other_thread_spinlock@h
+    ori r2, r2, kernel_other_thread_spinlock@l
+    ld r2, 0(r2)
+    cmpi cr0, r2, 0
+    beq- _start
+
+    ;// Get CPU object from thread ID, check for NULL which implies not
+    ;// started yet.
+    mfspr r1, PIR
+    lis r2, _ZN10CpuManager7cv_cpusE@h
+    ori r2, r2, _ZN10CpuManager7cv_cpusE@l
+    ld r2, 0(r2)                ;// Dereference cv_cpus to get array.
+    muli r1, r1, 8
+    ldx r2, r1, r2		;// Load CPU object.
+    cmpi cr0, r2, 0             ;// Check for NULL CPU object.
+    beq- _start
+
+    ;// Check for inactive CPU.
+    ld r1, CPU_STATUS(r2)
+    extrdi. r1, r1, 1, CPU_STATUS_ACTIVE
+    beq- intvect_system_reset_inactive
+
+    ;// Now we were an active processor so this must be a nap-wakeup.
+
+    ;// Find bit 42:44 of SRR1 (reason for SRESET).
+    mfsrr1 r2
+    extrdi r2, r2, 3, 42
+    ;// Check for decrementer (bits = 011).
+    cmpi cr0, r2, 0x3
+    beq+ intvect_system_reset_decrementer
+    ;// Check for external interrupt (bits = 010).
+    cmpi cr0, r2, 0x4
+    beq+ intvect_system_reset_external
+    ;// Check for HMI (bits = 101).
+    cmpi cr0, r2, 0x5
+    beq+ 1f ;// TODO: need to handle HMIs?
+1:  ;// Unknown reason.
+    b 1b
+
+    ;// @fn intvect_system_reset_inactive
+    ;// Handle SRESET on an inactive processor.
+    ;//     This is due to either instruction start or winkle-wakeup.
+intvect_system_reset_inactive:
+    ;// Check winkle state in CPU.
+    ld r1, CPU_STATUS(r2)
+    extrdi. r1, r1, 1, CPU_STATUS_WINKLED
+    beq+ _start
+
+    ;// Now we are a winkled processor that is awoken.
+    ld r1, CPU_KERNEL_STACK(r2)
+    ld r1, 0(r1)
+    mtsprg3 r1
+    b kernel_dispatch_task
+
     ;// @fn intvect_system_reset_decrementer
     ;// Handle SRESET due to decrementer wake-up.
-    ;//     This is a wake-up from 'doze'.  Clear priviledge escalation and
+    ;//     This is a wake-up from 'nap'.  Clear priviledge escalation and
     ;//     perform decrementer.
 intvect_system_reset_decrementer:
-    ;// Restore CR.
-    mtcr r1
-
-    ;// Clear MSR mask, since privilaged instruction was now executed (doze).
+    ;// Clear MSR mask, since privilaged instruction was now executed (nap).
     mfsprg3 r1  ;// Load task structure to r1.
     li r2, 0    ;// Zero r2.
     std r2, TASK_MSR_MASK(r1) ;// Zero msr_mask.
 
-    ;// Advance saved NIA (past doze).
+    ;// Advance saved NIA (past nap).
     mfsrr0 r1
     addi r1, r1, 4
     mtsrr0 r1
@@ -600,13 +636,10 @@ intvect_system_reset_decrementer:
 
     ;// @fn intvect_system_reset_external
     ;// Handle SRESET due to wake-up from external interrupt.
-    ;//     This is a wake-up from 'doze', but not due to the decrementer
-    ;//     itself firing.  Therefore, leave 'doze' process state alone
+    ;//     This is a wake-up from 'nap', but not due to the decrementer
+    ;//     itself firing.  Therefore, leave 'nap' process state alone
     ;//     including NIA and handle the external interrupt.
 intvect_system_reset_external:
-    ;// Restore CR.
-    mtcr r1
-
     ;// Restore save registers.
     mfsprg0 r1
     mfsprg1 r2
@@ -638,9 +671,9 @@ system_call_fast_path:
     bne cr0, 4f
         ;// Check for being on master processor.
     mfsprg3 r6          ;// Get task structure.
-    ld r6, 0(r6)        ;// Get CPU structure.
-    lbz r6, 12(r6)      ;// Read master boolean.
-    cmpi cr0, r6, 0x0
+    ld r6, TASK_CPUPTR(r6)        ;// Get CPU structure.
+    ld r6, CPU_STATUS(r6)       ;// Read master boolean.
+    extrdi. r6, r6, 1, CPU_STATUS_MASTER
     beq cr0, 300f       ;// Call TASK_MIGRATE_TO_MASTER if not on master.
         ;// Read scratch.
     mtspr 276, r4
@@ -665,9 +698,9 @@ system_call_fast_path:
     bne cr0, 5f
         ;// Check for master processor.
     mfsprg3 r6          ;// Get task structure.
-    ld r6, 0(r6)        ;// Get CPU structure.
-    lbz r6, 12(r6)      ;// Read master boolean.
-    cmpi cr0, r6, 0x0
+    ld r6, TASK_CPUPTR(r6)        ;// Get CPU structure.
+    ld r6, CPU_STATUS(r6)       ;// Read master boolean.
+    extrdi. r6, r6, 1, CPU_STATUS_MASTER
     beq cr0, 300b       ;// Call TASK_MIGRATE_TO_MASTER if not on master.
         ;// Write scratch.
     mtspr 276, r4
@@ -785,6 +818,34 @@ kernel_shutdown_ea0_1_mode:
 
 STD_INTERRUPT_NOADDR(hype_emu_assist)
 
+    ;// @fn kernel_execute_winkle
+    ;//
+    ;// Saves kernel state into a specified task structure and then executes
+    ;// the winkle instruction.
+    ;//
+    ;// @param r3 - task_t* to save kernel state into.
+    ;//
+.global kernel_execute_winkle
+kernel_execute_winkle:
+    ;// Move save area to SPRG3 for kernel_save_task.
+    mtsprg3 r3
+
+    ;// Copy LR to SRR0 (since that is where kernel_save_task gets it from).
+    mflr r3
+    mtsrr0 r3
+
+    ;// Load winkle instruction address into the "return to" address (SPRG0).
+    lis r3, 1f@h
+    ori r3, r3, 1f@l
+    mtsprg0 r3
+
+    ;// Save kernel state.
+    b kernel_save_task
+
+    ;// Execute winkle.
+1:
+    rvwinkle
+
 .section .data
     .balign 1024
 kernel_stack:
diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C
index 6dcbe0c25..645691d70 100644
--- a/src/kernel/syscall.C
+++ b/src/kernel/syscall.C
@@ -52,19 +52,7 @@ void kernel_execute_decrementer()
 
     task_t* current_task = TaskManager::getCurrentTask();
 
-    CpuManager::executePeriodics(c);//TODO is there still a potential deadlock?
-
-    if (CpuManager::isShutdownRequested())
-    {
-        // The code below could cause a hang during shutdown
-        // The stats can be retrieved from global variables as needed.
-        // This can be uncommented for debug if desired
-#ifdef __MEMSTATS__
-        if(c->master)
-            HeapManager::stats();
-#endif
-        KernelMisc::shutdown();
-    }
+    CpuManager::executePeriodics(c);
 
     if (current_task == TaskManager::getCurrentTask())
     {
@@ -98,7 +86,8 @@ namespace Systemcalls
     void CpuDDLevel(task_t *t);
     void CpuStartCore(task_t *t);
     void CpuSprValue(task_t *t);
-    void CpuDoze(task_t *t);
+    void CpuNap(task_t *t);
+    void CpuWinkle(task_t *t);
     void MmAllocBlock(task_t *t);
     void MmRemovePages(task_t *t);
     void MmSetPermission(task_t *t);
@@ -134,7 +123,8 @@ namespace Systemcalls
         &CpuDDLevel,  // MISC_CPUDDLEVEL
         &CpuStartCore, // MISC_CPUSTARTCORE
         &CpuSprValue, // MISC_CPUSPRVALUE
-        &CpuDoze, // MISC_CPUDOZE
+        &CpuNap, // MISC_CPUNAP
+        &CpuWinkle,   // MISC_CPUWINKLE
 
         &MmAllocBlock, // MM_ALLOC_BLOCK
         &MmRemovePages, // MM_REMOVE_PAGES
@@ -670,33 +660,52 @@ namespace Systemcalls
     };
 
     /**
-     *  Allow a task to request priviledge escalation to execute the 'doze'
+     *  Allow a task to request priviledge escalation to execute the 'nap'
      *  instruction.
      *
-     *  Verifies the instruction to execute is, in fact, doze and then sets
+     *  Verifies the instruction to execute is, in fact, nap and then sets
      *  an MSR mask in the task structure to allow escalation on next
      *  execution.
      *
-     *  When 'doze' is executed the processor will eventually issue an
+     *  When 'nap' is executed the processor will eventually issue an
      *  SRESET exception with flags in srr1 to indication that the
      *  decrementer caused the wake-up.  The kernel will then need to
-     *  advance the task to the instruction after the doze and remove
+     *  advance the task to the instruction after the nap and remove
      *  priviledge escalation.
      *
      */
-    void CpuDoze(task_t *t)
+    void CpuNap(task_t *t)
     {
 
         uint32_t* instruction = static_cast<uint32_t*>(t->context.nip);
-        if (0x4c000324 == (*instruction)) // Verify 'doze' instruction,
+        if (0x4c000364 == (*instruction)) // Verify 'nap' instruction,
                                           // otherwise just return.
         {
-            // Disable PR, IR, DR so 'doze' can be executed.
+            // Disable PR, IR, DR so 'nap' can be executed.
             //     (which means to stay in HV state)
             t->context.msr_mask = 0x4030;
         }
     };
 
+    /** Winkle all the threads so that the runtime SLW image can be loaded. */
+    void CpuWinkle(task_t *t)
+    {
+        cpu_t* cpu = CpuManager::getCurrentCPU();
+
+        if ((CpuManager::getCpuCount() > CpuManager::getThreadCount()) ||
+            (!cpu->master))
+        {
+            TASK_SETRTN(t, -EDEADLK);
+        }
+        else
+        {
+            TASK_SETRTN(t, 0);
+            DeferredQueue::insert(new KernelMisc::WinkleCore(t));
+            TaskManager::setCurrentTask(cpu->idle_task);
+            DeferredQueue::execute();
+        }
+    }
+
     /**
      * Allocate a block of virtual memory within the base segment
      * @param[in] t: The task used to allocate a block in the base segment