new HEAP manager to reduce fragmentation

Change-Id: Ibe725a43e6366d9113ec99df1cc6aafa7bbb770e Reviewed-on: http://gfw160.austin.ibm.com:8080/gerrit/431 Tested-by: Jenkins Server Reviewed-by: A. Patrick Williams III <iawillia@us.ibm.com> Reviewed-by: Douglas R. Gilbert <dgilbert@us.ibm.com>
author: Doug Gilbert <dgilbert@us.ibm.com> 2011-09-26 13:36:33 -0500
committer: Douglas R. Gilbert <dgilbert@us.ibm.com> 2011-10-25 11:16:20 -0500
commit: 5ab488739184f2b2649193e3f9da695ee334d04f (patch)
tree: 3d47e74b8dd290598527988adccff0ff57c72dc0 /src
parent: d127ad9d985ffd7a42dba798bee66654242c4fe6 (diff)
download: blackbird-hostboot-5ab488739184f2b2649193e3f9da695ee334d04f.tar.gz
blackbird-hostboot-5ab488739184f2b2649193e3f9da695ee334d04f.zip
17 files changed, 1073 insertions, 223 deletions
diff --git a/src/include/kernel/barrier.H b/src/include/kernel/barrier.H
new file mode 100644
index 000000000..b5587b4cd
--- /dev/null
+++ b/src/include/kernel/barrier.H
@@ -0,0 +1,76 @@
+//  IBM_PROLOG_BEGIN_TAG
+//  This is an automatically generated prolog.
+//
+//  $Source: src/include/kernel/barrier.H $
+//
+//  IBM CONFIDENTIAL
+//
+//  COPYRIGHT International Business Machines Corp. 2011
+//
+//  p1
+//
+//  Object Code Only (OCO) source materials
+//  Licensed Internal Code Source Materials
+//  IBM HostBoot Licensed Internal Code
+//
+//  The source code for this program is not published or other-
+//  wise divested of its trade secrets, irrespective of what has
+//  been deposited with the U.S. Copyright Office.
+//
+//  Origin: 30
+//
+//  IBM_PROLOG_END
+#ifndef __KERNEL_BARRIER_H
+#define __KERNEL_BARRIER_H
+
+#include <kernel/spinlock.H>
+#include <stdint.h>
+
+/**
+ * Barrier to be used in kernel space
+ */
+class Barrier
+{
+    public:
+
+        /**
+         * Default Constructor
+         */
+        Barrier() :
+            iv_count(0),
+            iv_event(0),
+            iv_missing(0) {};
+
+        /**
+         * Construct and init a barrier
+         * @param[in] i_count number of cpus to wait on
+         */
+        Barrier(size_t i_count) :
+            iv_count(i_count),
+            iv_event(0),
+            iv_missing(i_count) {}
+        
+        /**
+         * Initialize the barrier
+         * @param[in] i_count number of cpus to wait on
+         */
+        void init(size_t i_count)
+        {
+            iv_count = i_count;
+            iv_missing = i_count;
+        }
+
+        /**
+         * Wait until all threads have been arrived
+         */
+	void wait();
+
+    private:
+
+        Spinlock iv_spinlock;           //!< Spinlock acts as mutex
+        volatile size_t iv_count;       //!< Barrier thread count
+        volatile size_t iv_event;       //!< Barrier event
+        volatile size_t iv_missing;     //!< # of missing threads to wait for
+};
+
+#endif
diff --git a/src/include/kernel/cpu.H b/src/include/kernel/cpu.H
index ac09da288..f308fb4be 100644
--- a/src/include/kernel/cpu.H
+++ b/src/include/kernel/cpu.H
@@ -58,10 +58,13 @@ struct cpu_t
     struct
     {
         /** If the CPU is the master */
-        bool master;
+        bool master:1;
+
+        /** If the CPU is active */
+        bool active:1;
 
         /** Ensure alignment of master attribute for asm code. */
-        uint64_t __reserved_master:24;
+        uint64_t __reserved_master:30;
     } PACKED;
 
     /** Pointer to the scheduler for this CPU (may not be unique) */
diff --git a/src/include/kernel/cpumgr.H b/src/include/kernel/cpumgr.H
index b60c72bc6..78ade4ae3 100644
--- a/src/include/kernel/cpumgr.H
+++ b/src/include/kernel/cpumgr.H
@@ -25,73 +25,86 @@
 
 #include <kernel/types.h>
 #include <kernel/cpu.H>
+#include <kernel/barrier.H>
 
 class CpuManager
 {
     public:
-	enum 
+        enum 
         { 
             MAXCPUS = KERNEL_MAX_SUPPORTED_CPUS,
             CPU_PERIODIC_CHECK_MEMORY = 64,
-            CPU_PERIODIC_FLUSH_PAGETABLE = 1024,
+            CPU_PERIODIC_FLUSH_PAGETABLE = 512, //TODO 1024 not currently hit
+            CPU_PERIODIC_DEFRAG = 949,  // TODO Any bigger not currently hit
         };
 
-	/** @fn getCurrentCPU
-	 *  Returns a pointer to the current CPU structure by using the
-	 *  task structure in SPRG3.
-	 */
-	static cpu_t* getCurrentCPU();
-	static cpu_t* getCpu(size_t i) { return cv_cpus[i]; }
+        /** @fn getCurrentCPU
+         *  Returns a pointer to the current CPU structure by using the
+         *  task structure in SPRG3.
+         */
+        static cpu_t* getCurrentCPU();
+        static cpu_t* getCpu(size_t i) { return cv_cpus[i]; }
 
         /** @brief Return pointer to master CPU object.
-         */
+        */
         static cpu_t* getMasterCPU();
 
-	static void init();
-	static void init_slave_smp(cpu_t*);
+        static void init();
+        static void init_slave_smp(cpu_t*);
 
-    /** @fn requestShutdown
-     *  Requests that all CPUs shutdown
-     */
-    static void requestShutdown(uint64_t i_status);
+        /** @fn requestShutdown
+         *  Requests that all CPUs shutdown
+         */
+        static void requestShutdown(uint64_t i_status);
+
+        /** @fn isShutdownRequested
+         *  Returns if a shutdown of all CPUs was requested
+         */
+        static bool isShutdownRequested() { return cv_shutdown_requested; }
 
-    /** @fn isShutdownRequested
-     *  Returns if a shutdown of all CPUs was requested
-     */
-    static bool isShutdownRequested() { return cv_shutdown_requested; }
+        /** @fn getShutdownStatus
+         *  Returns the status code that needs to be posted during shutdown
+         */
+        static uint32_t getShutdownStatus() { return cv_shutdown_status; }
 
-    /** @fn getShutdownStatus
-     *  Returns the status code that needs to be posted during shutdown
-     */
-    static uint32_t getShutdownStatus() { return cv_shutdown_status; }
+        /** @fn executePeriodics
+         * Perform periodic actions
+         * @param[in] cpu_t the CPU
+         */
+        static void executePeriodics(cpu_t * i_cpu);
 
-    /** @fn executePeriodics
-     * Perform periodic actions
-     * @param[in] cpu_t the CPU
-     */
-    static void executePeriodics(cpu_t * i_cpu);
+        /** @fn activateCPU
+         * Activate a cpu to show it is running
+         * @param[in] cpu_t the CPU
+         * @post Active flag on in cpu_t structure
+         */
+        static void activateCPU(cpu_t * i_cpu);
 
 
     protected:
-	CpuManager();
-	~CpuManager() {}
+        CpuManager();
+        ~CpuManager() {}
 
-	/** @fn startCPU
-	 *  Starts the requested CPU. Default of -1 implies current CPU.
-	 */
-	void startCPU(ssize_t i = -1);
-	void startSlaveCPU(cpu_t*);
+        /** @fn startCPU
+         *  Starts the requested CPU. Default of -1 implies current CPU.
+         */
+        void startCPU(ssize_t i = -1);
+        void startSlaveCPU(cpu_t*);
 
     private:
-	static cpu_t* cv_cpus[MAXCPUS]; // Need to be able to access this
-					// from start.S to get initial stacks
-					// of secondary cpus / threads.
+        static cpu_t* cv_cpus[MAXCPUS]; // Need to be able to access this
+                                        // from start.S to get initial stacks
+                                        // of secondary cpus / threads.
+
+        static Barrier cv_barrier;   //!<  barrier for cpus
+        static bool cv_defrag;       //!<  mem heap defrag
+        static size_t cv_cpuCount;   //!<  # of active CPUs
 
-    // If a shutdown of all CPUs is requested
-    static bool cv_shutdown_requested;
+        // If a shutdown of all CPUs is requested
+        static bool cv_shutdown_requested;
 
-    // The status code that needs to be posted during shutdown
-    static uint64_t cv_shutdown_status;
+        // The status code that needs to be posted during shutdown
+        static uint64_t cv_shutdown_status;
 };
 
 #endif
diff --git a/src/include/kernel/heapmgr.H b/src/include/kernel/heapmgr.H
index df52ee068..a2509e060 100644
--- a/src/include/kernel/heapmgr.H
+++ b/src/include/kernel/heapmgr.H
@@ -25,41 +25,194 @@
 
 #include <stdint.h>
 #include <util/lockfree/stack.H>
+#include <builtins.h>
 
+extern "C"
+void kernel_execute_decrementer();
+
+/**
+ * A class to manage the dynamic memory allocation heap.
+ * <p>There is a small allocation heap and a large heap.
+ * The Small allocation heap is a Fibonacci buddy system divided into the
+ * following bucket sizes: 16,32,48,80,128,208,336,544,880,1424,2304,3728.
+ * Eight bytes is used for overhead in each allocation making the effective
+ * bucket sizes  8,24,40,72,120,200,328,536,872,1416,2296, and 3720.
+ * The small heap increases one page at a time as allocations are needed.
+ * A 4k pages is initially divided into buckes of size 3728,336, and 32.
+ * Pages can't be recovered once assinged to the small heap.</p>
+ * 
+ * <p>Anthing larger than 3720 goes into the large allocation heap.
+ * Memory in the large allocation heap are assigned as integral pages.
+ * When memory is released from the large allocation heap, it is returned
+ * to the system page manager.</p>
+ */
 class HeapManager
 {
     public:
-	static void init();
+        enum
+        {
+            BUCKETS             = 12,   //!< number of buckets
+            MIN_BUCKET_SIZE     = 16,   //!< Smallest bucket size
+            FIB_START_INCR      = 16,   //!< Seed for the Fibonacci series
+            BUCKET_SIZE0        = MIN_BUCKET_SIZE,
+            BUCKET_SIZE1        = BUCKET_SIZE0 + FIB_START_INCR,
+            BUCKET_SIZE2        = BUCKET_SIZE1 + BUCKET_SIZE0,
+            BUCKET_SIZE3        = BUCKET_SIZE2 + BUCKET_SIZE1,
+            BUCKET_SIZE4        = BUCKET_SIZE3 + BUCKET_SIZE2,
+            BUCKET_SIZE5        = BUCKET_SIZE4 + BUCKET_SIZE3,
+            BUCKET_SIZE6        = BUCKET_SIZE5 + BUCKET_SIZE4,
+            BUCKET_SIZE7        = BUCKET_SIZE6 + BUCKET_SIZE5,
+            BUCKET_SIZE8        = BUCKET_SIZE7 + BUCKET_SIZE6,
+            BUCKET_SIZE9        = BUCKET_SIZE8 + BUCKET_SIZE7,
+            BUCKET_SIZE10       = BUCKET_SIZE9 + BUCKET_SIZE8,
+            BUCKET_SIZE11       = BUCKET_SIZE10 + BUCKET_SIZE9,
+
+            MAX_SMALL_ALLOC_SIZE      = BUCKET_SIZE11 - 8,// last bucket size-8
+        };
+
+        friend class CpuManager;
+        friend void kernel_execute_decrementer();
+
+        /**
+         * Initalize the HeapManager
+         */
+        static void init();
+
+        /**
+         * Reqest an allocation of heap memory
+         * @param[in] i_sz  requested memory size in bytes
+         * @return a pointer to allocated memory
+         */
+        static void* allocate(size_t i_sz);
 
-	static void* allocate(size_t n);
-	static void  free(void *);
+        /**
+         * Request a change in allocation size
+         * @param[in] i_ptr The memory allocation
+         * @param[in] i_sz  The new size
+         * @return a pointer to allocated memory
+         * @post Memory may have been moved
+         */
+        static void* realloc(void * i_ptr, size_t i_sz);
 
-	enum
-	{
-	    BUCKETS = 8,
+        /**
+         * Return an allocation to the heap.
+         * @param[in] The allocation
+         */
+        static void  free(void * i_ptr);
 
-	    MAX_ALLOC_SIZE = (1 << (BUCKETS + 3)) - 8,
-	};
 
     protected:
-	HeapManager() {};
-	~HeapManager() {};
+
+        /**
+         * ctor
+         */
+        HeapManager() {};
+
+        /**
+         * dtor
+         */
+        ~HeapManager() {};
+
+        /**
+         * Coalesce unallocated memory chucks
+         * @pre This function can only be called from kernel space and
+         * requires that all other processes are quiesced
+         */
+        static void coalesce( void );
+
+        /**
+         * Print some stats to printkd
+         * @pre system quiesced, This function can only be called from
+         *      kernel space.
+         * @post coalesce() called
+         */
+        static void stats( void );
 
     private:
-	void* _allocate(size_t);
-	void _free(void*);
 
-	struct chunk_t
-	{
-	    size_t len;
-	    chunk_t* next;
-	};
-	Util::Lockfree::Stack<chunk_t> first_chunk[BUCKETS];
+        struct chunk_t
+        {
+            struct
+            {
+                bool    free:1;         //!< Is chunck free
+                uint64_t bucket:63;     //!< Which bucket this chunk belongs to
+            } PACKED;
+            
+            chunk_t* next;      //!< Next chunk (for unallocated chunks only)
+        };
 
-	chunk_t* pop_bucket(size_t);
-	void push_bucket(chunk_t*, size_t);
+        /**
+         * big chunk directory entry
+         */
+        struct big_chunk_t
+        {
+            void * addr;        //!< Allocated address
+            size_t page_count;  //!< Number of pages used
+            big_chunk_t * next; //!< Next allocation
 
-	void newPage();
-};
+            big_chunk_t(void * i_ptr, size_t i_pages) 
+                : addr(i_ptr), page_count(i_pages), next(NULL) {}
+        };
 
+        void* _allocate(size_t);        //!< see allocate
+        void* _allocateBig(size_t);     //!< see allocate
+        void* _realloc(void*,size_t);   //!< see realloc
+        void* _reallocBig(void*,size_t);//!< see realloc
+        void _free(void*);              //!< see free
+        bool _freeBig(void*);           //!< see free
+        void _coalesce(void);           //!< see coalesce
+
+        /**
+         * Get a chunk of free memory from the given bucket
+         * @param[in] The bucket index
+         * @return  a chunk
+         */
+        chunk_t* pop_bucket(size_t);
+
+        /**
+         * Push a free chunk onto the free bucket stack
+         * @param[in] the chunk
+         * @param[in] the bucket index
+         */
+        void push_bucket(chunk_t*, size_t);
+
+        /**
+         * Get a new page from the page manager, set it up, and addit it to the
+         * free pool
+         */
+        void newPage();
+
+        /**
+         * Get the bytesize of the given bucket
+         * @param[in] The bucket index
+         * @return the bytesize of the bucket
+         */
+        ALWAYS_INLINE
+            size_t bucketByteSize(size_t i_bucketIndex)
+            {
+                return cv_chunk_size[i_bucketIndex];
+            }
+
+        /**
+         * Get the bucket index for a given size
+         * @param[in] The bytesize
+         * @return the smallest bucket index that the given size will fit into
+         */
+        size_t bucketIndex(size_t i_sz);
+
+        /**
+         * Walk through all the allocated pages and verify coherency
+         */
+        void test_pages();
+
+    private: // data
+
+        Util::Lockfree::Stack<chunk_t> first_chunk[BUCKETS]; //!< free pool
+        Util::Lockfree::Stack<big_chunk_t> big_chunk_stack;  //!< big chunk dir
+
+        static const size_t cv_chunk_size[BUCKETS];//!< The bucket sizes
+        static size_t cv_coalesce_count;       //!< coalesced chunk count
+        static size_t cv_free_bytes;           //!< Only valid after coalesce()
+        static size_t cv_free_chunks;          //!< Only valid after coalesce()
+};
 #endif
diff --git a/src/include/kernel/pagemgr.H b/src/include/kernel/pagemgr.H
index 6cb6fefc4..3ed2c1b34 100644
--- a/src/include/kernel/pagemgr.H
+++ b/src/include/kernel/pagemgr.H
@@ -29,6 +29,8 @@
 #include <kernel/vmmmgr.H>
 #include <builtins.h>
 #include <kernel/console.H>
+#include <util/align.H>
+#include <sys/vfs.h>
 
 /** @class PageManager
  *  @brief Manages the allocation of memory pages.
@@ -37,10 +39,10 @@
 class PageManager
 {
     public:
-	static void init();
+        static void init();
 
-	static void* allocatePage(size_t n = 1);
-	static void freePage(void*, size_t n = 1);
+        static void* allocatePage(size_t n = 1);
+        static void freePage(void*, size_t n = 1);
 
         /**
          * Query state for available memory
@@ -48,9 +50,16 @@ class PageManager
          */
         static uint64_t queryAvail();
 
-	enum 
-	{ 
-	    MEMLEN = VmmManager::HTABORG,
+        /**
+         * Coalesce adjacent free memory blocks
+         * @pre This function can only be called from kernel space and
+         * requires that all other processes are quiesced
+         */
+        static void coalesce( void );
+
+        enum 
+        { 
+            MEMLEN = VmmManager::HTABORG,
 
 	    BUCKETS = 16,
 	};
@@ -62,6 +71,7 @@ class PageManager
     private:
 	void* _allocatePage(size_t);
 	void _freePage(void*, size_t);
+        void _coalesce( void );         //!< see coalesce()
 
         /** see queryAvail() */
         ALWAYS_INLINE
@@ -70,6 +80,13 @@ class PageManager
             return (100*iv_pagesAvail)/iv_pagesTotal;
         }
 
+        ALWAYS_INLINE
+        uint64_t firstPageAddr( void )
+        {
+            return ALIGN_PAGE(VFS_LAST_ADDRESS);
+        }
+            
+
             /** Statistics on number of free pages (for debug) */
         uint64_t iv_pagesAvail;
             /** Total number of pages */
@@ -77,12 +94,16 @@ class PageManager
 
 	struct page_t
 	{
-	    page_t* next;
+	    page_t* next;       //!< Next block of pages
+            page_t* prev;       //!< Prev block of pages
+            page_t* key;        //!< Key for pqueue
 	};
 	Util::Lockfree::Stack<page_t> first_page[BUCKETS];
 
 	page_t* pop_bucket(size_t);
 	void push_bucket(page_t*, size_t);
+
+        static size_t cv_coalesce_count;        //!< running coalesced counter
 };
 
 #endif
diff --git a/src/include/util/align.H b/src/include/util/align.H
index 7ff67ba38..55073256d 100644
--- a/src/include/util/align.H
+++ b/src/include/util/align.H
@@ -28,6 +28,9 @@
 // Return a number >= input that is aligned on a 4-byte boundary
 #define ALIGN_4(u) ((u + 0x3ull) & ~0x3ull)
 
+// Return a number >= input that is aligned on a 8-byte bounday
+#define ALIGN_8(u) ((u + 0x7ull) & ~0x7ull)
+
 // Return a number >= input that is aligned on a page boundary
 #define ALIGN_PAGE(u) ((u + (PAGESIZE-1)) & ~(PAGESIZE-1))
 
diff --git a/src/include/util/lockfree/stack.H b/src/include/util/lockfree/stack.H
index 8e549df12..61120364b 100644
--- a/src/include/util/lockfree/stack.H
+++ b/src/include/util/lockfree/stack.H
@@ -29,50 +29,64 @@ namespace Util
 {
     namespace Lockfree
     {
-	template <typename _T>
-	class Stack
-	{
-	    public:
-		Stack() : head(NULL) {};
-		
-		_T* pop();
-		void push(_T*);
+        template <typename _T>
+            class Stack
+            {
+                public:
+                    Stack() : head(NULL) {};
 
-	    private:
-		_T* head;
-	};
+                    _T* pop();
+                    void push(_T*);
 
-	template <typename _T>
-	_T* Stack<_T>::pop()
-	{
-	    _T * _head = head;
-	    _T * h = (_T*) (((uint64_t)_head) & 0x00000000FFFFFFFF);
-	    if (NULL == h) return h;	    
-	    uint64_t token = ((uint64_t)_head) >> 32;
-	    token++;
-	    _T * h_next = (_T*) (((uint64_t)(h->next)) | (token << 32));
-	    if (!__sync_bool_compare_and_swap(&head,
-					      _head,
-					      h_next))
-		return pop();
-	    return h;
-	}
+                    /**
+                     * Get a pointer to the first element in the stack
+                     * @return the pointer to the first element
+                     * @Note:  SMP safety of this pointer is not guaranteed.
+                     */
+                    _T* first();
 
-	template <typename _T>
-	void Stack<_T>::push(_T* p)
-	{
-	    _T* _head = head;
-	    p->next = (_T*) (((uint64_t)_head) & 0x00000000FFFFFFFF);
-	    uint64_t token = ((uint64_t)_head) >> 32;
-	    token++;
+                private:
+                    _T* head;
+            };
 
-	    _T* _p = (_T*) (((uint64_t)p) | (token << 32));
-	    	    
-	    if (!__sync_bool_compare_and_swap(&head,
-					      _head,
-					      _p))
-		push(p);
-	}
+        template <typename _T>
+            _T* Stack<_T>::first()
+            {
+                _T * _head = head;
+                return (_T*) (((uint64_t)_head) & 0x00000000FFFFFFFF);
+            }
+
+        template <typename _T>
+            _T* Stack<_T>::pop()
+            {
+                _T * _head = head;
+                _T * h = (_T*) (((uint64_t)_head) & 0x00000000FFFFFFFF);
+                if (NULL == h) return h;	    
+                uint64_t token = ((uint64_t)_head) >> 32;
+                token++;
+                _T * h_next = (_T*) (((uint64_t)(h->next)) | (token << 32));
+                if (!__sync_bool_compare_and_swap(&head,
+                                                  _head,
+                                                  h_next))
+                    return pop();
+                return h;
+            }
+
+        template <typename _T>
+            void Stack<_T>::push(_T* p)
+            {
+                _T* _head = head;
+                p->next = (_T*) (((uint64_t)_head) & 0x00000000FFFFFFFF);
+                uint64_t token = ((uint64_t)_head) >> 32;
+                token++;
+
+                _T* _p = (_T*) (((uint64_t)p) | (token << 32));
+
+                if (!__sync_bool_compare_and_swap(&head,
+                                                  _head,
+                                                  _p))
+                    push(p);
+            }
     }
 }
 
diff --git a/src/kernel/barrier.C b/src/kernel/barrier.C
new file mode 100644
index 000000000..a5dadc63a
--- /dev/null
+++ b/src/kernel/barrier.C
@@ -0,0 +1,42 @@
+//  IBM_PROLOG_BEGIN_TAG
+//  This is an automatically generated prolog.
+//
+//  $Source: src/kernel/barrier.C $
+//
+//  IBM CONFIDENTIAL
+//
+//  COPYRIGHT International Business Machines Corp. 2011
+//
+//  p1
+//
+//  Object Code Only (OCO) source materials
+//  Licensed Internal Code Source Materials
+//  IBM HostBoot Licensed Internal Code
+//
+//  The source code for this program is not published or other-
+//  wise divested of its trade secrets, irrespective of what has
+//  been deposited with the U.S. Copyright Office.
+//
+//  Origin: 30
+//
+//  IBM_PROLOG_END
+#include <kernel/barrier.H>
+#include <arch/ppc.H>
+
+void Barrier::wait()
+{
+    iv_spinlock.lock();
+    --iv_missing;
+    if(iv_missing > 0)
+    {
+        size_t l_event = iv_event;
+        iv_spinlock.unlock();
+        while(iv_event == l_event);
+    }
+    else
+    {
+        iv_missing = iv_count;
+        ++iv_event;
+        iv_spinlock.unlock();
+    }
+}
diff --git a/src/kernel/cpumgr.C b/src/kernel/cpumgr.C
index 86e8653bf..6ec3b9f6d 100644
--- a/src/kernel/cpumgr.C
+++ b/src/kernel/cpumgr.C
@@ -34,10 +34,14 @@
 #include <sys/sync.h>
 #include <kernel/cpuid.H>
 #include <kernel/ptmgr.H>
+#include <kernel/heapmgr.H>
 
 cpu_t* CpuManager::cv_cpus[CpuManager::MAXCPUS] = { NULL };
 bool CpuManager::cv_shutdown_requested = false;
 uint64_t CpuManager::cv_shutdown_status = 0;
+Barrier CpuManager::cv_barrier;
+bool CpuManager::cv_defrag = false;
+size_t CpuManager::cv_cpuCount = 0;
 
 CpuManager::CpuManager()
 {
@@ -150,6 +154,7 @@ void CpuManager::startCPU(ssize_t i)
     if (currentCPU)
     {
         setDEC(TimeManager::getTimeSliceCount());
+        activateCPU(cv_cpus[i]);
     }
     return;
 }
@@ -157,10 +162,18 @@ void CpuManager::startCPU(ssize_t i)
 void CpuManager::startSlaveCPU(cpu_t* cpu)
 {
     setDEC(TimeManager::getTimeSliceCount());
+    activateCPU(cpu);
 
     return;
 }
 
+void CpuManager::activateCPU(cpu_t * i_cpu)
+{
+    i_cpu->active = true;
+    ++cv_cpuCount;
+    lwsync();
+}
+
 void CpuManager::executePeriodics(cpu_t * i_cpu)
 {
     if(i_cpu->master)
@@ -169,11 +182,11 @@ void CpuManager::executePeriodics(cpu_t * i_cpu)
         if(0 == (i_cpu->periodic_count % CPU_PERIODIC_CHECK_MEMORY))
         {
             uint64_t pcntAvail = PageManager::queryAvail();
-            if(pcntAvail < 16) // Less than 16% pages left TODO 16 ok?
+            if(pcntAvail < 16)
             {
                 VmmManager::flushPageTable();
                 ++(i_cpu->periodic_count);   // prevent another flush below
-                if(pcntAvail < 5) // TODO 5% ok
+                if(pcntAvail < 5)
                 {
                     VmmManager::castOutPages(VmmManager::CRITICAL);
                 }
@@ -187,6 +200,31 @@ void CpuManager::executePeriodics(cpu_t * i_cpu)
         {
             VmmManager::flushPageTable();
         }
+        if(0 == (i_cpu->periodic_count % CPU_PERIODIC_DEFRAG))
+        {
+            // set up barrier based on # cpus cv_barrier;
+            // TODO whatif other cpus become active?
+            isync();
+            cv_barrier.init(cv_cpuCount);
+            if(!cv_shutdown_requested)
+            {
+                cv_defrag = true;
+                lwsync();
+            }
+        }
+    }
+    if(cv_defrag)
+    {
+        cv_barrier.wait();
+
+        if(i_cpu->master)
+        {
+            HeapManager::coalesce();
+            PageManager::coalesce();
+            cv_defrag = false;
+        }
+
+        cv_barrier.wait();
     }
 }
 
diff --git a/src/kernel/heapmgr.C b/src/kernel/heapmgr.C
index 7d3757b9f..e3d2ad1ea 100644
--- a/src/kernel/heapmgr.C
+++ b/src/kernel/heapmgr.C
@@ -25,87 +25,511 @@
 #include <util/singleton.H>
 #include <kernel/console.H>
 #include <kernel/pagemgr.H>
+#include <util/align.H>
+#include <arch/ppc.H>
+
+#ifdef HOSTBOOT_DEBUG
+#define SMALL_HEAP_PAGES_TRACKED 64
+
+// track pages allocated to smallheap
+void * g_smallHeapPages[SMALL_HEAP_PAGES_TRACKED];
+
+// If these stats are to be kept then they should be modified using
+// atomic instructions
+uint16_t g_bucket_counts[HeapManager::BUCKETS];
+uint32_t g_smallheap_allocated = 0;  // sum of currently allocated
+uint32_t g_smallheap_alloc_hw  = 0;  // allocated high water
+uint32_t g_smallheap_pages = 0;  // total bytes high water (pages used)
+uint32_t g_smallheap_count = 0;     // # of chunks allocated
+
+uint32_t g_big_chunks = 0;
+uint32_t g_bigheap_highwater = 0;
+#endif
+
+const size_t HeapManager::cv_chunk_size[BUCKETS] =
+{
+    HeapManager::BUCKET_SIZE0,
+    HeapManager::BUCKET_SIZE1,
+    HeapManager::BUCKET_SIZE2,
+    HeapManager::BUCKET_SIZE3,
+    HeapManager::BUCKET_SIZE4,
+    HeapManager::BUCKET_SIZE5,
+    HeapManager::BUCKET_SIZE6,
+    HeapManager::BUCKET_SIZE7,
+    HeapManager::BUCKET_SIZE8,
+    HeapManager::BUCKET_SIZE9,
+    HeapManager::BUCKET_SIZE10,
+    HeapManager::BUCKET_SIZE11
+};
+
+size_t HeapManager::cv_coalesce_count = 0;
+size_t HeapManager::cv_free_bytes;          
+size_t HeapManager::cv_free_chunks;         
+
 
 void HeapManager::init()
 {
     Singleton<HeapManager>::instance();
 }
 
-void* HeapManager::allocate(size_t n)
+void * HeapManager::allocate(size_t i_sz)
 {
     HeapManager& hmgr = Singleton<HeapManager>::instance();
-    return hmgr._allocate(n);
+    if(i_sz > MAX_SMALL_ALLOC_SIZE)
+    {
+        return hmgr._allocateBig(i_sz);
+    }
+    return hmgr._allocate(i_sz);
 }
 
-void HeapManager::free(void* p)
+void HeapManager::free(void * i_ptr)
 {
     HeapManager& hmgr = Singleton<HeapManager>::instance();
-    return hmgr._free(p);
+    return hmgr._free(i_ptr);
 }
 
-void* HeapManager::_allocate(size_t n)
+void* HeapManager::realloc(void* i_ptr, size_t i_sz)
 {
-    size_t which_bucket = 0;
-    while (n > (size_t)((1 << (which_bucket + 4)) - 8)) which_bucket++;
+    return Singleton<HeapManager>::instance()._realloc(i_ptr,i_sz);
+}
 
-    chunk_t* chunk = (chunk_t*)NULL;
+void HeapManager::coalesce( void )
+{
+    Singleton<HeapManager>::instance()._coalesce();
+}
+
+void* HeapManager::_allocate(size_t i_sz)
+{
+    size_t which_bucket = bucketIndex(i_sz+8);
+
+    chunk_t* chunk = reinterpret_cast<chunk_t*>(NULL);
     chunk = pop_bucket(which_bucket);
     if (NULL == chunk)
     {
 	newPage();
-	return _allocate(n);
+	return _allocate(i_sz);
     }
     else
     {
-	return &chunk->next;
+#ifdef HOSTBOOT_DEBUG
+        size_t alloc = bucketByteSize(chunk->bucket);
+        __sync_add_and_fetch(&g_smallheap_count,1);
+        __sync_add_and_fetch(&g_smallheap_allocated,alloc);
+        if (g_smallheap_allocated > g_smallheap_alloc_hw)
+            g_smallheap_alloc_hw = g_smallheap_allocated;
+        // test_pages();
+#endif
+        chunk->free = false;
+        return &chunk->next;
+    }
+}
+
+
+void* HeapManager::_realloc(void* i_ptr, size_t i_sz)
+{
+    void* new_ptr = _reallocBig(i_ptr,i_sz);
+    if(new_ptr) return new_ptr;
+    
+    new_ptr = i_ptr;
+    chunk_t* chunk = reinterpret_cast<chunk_t*>(((size_t*)i_ptr)-1);
+    size_t asize = bucketByteSize(chunk->bucket)-8;
+    if(asize < i_sz)
+    {
+        new_ptr = _allocate(i_sz);
+        memcpy(new_ptr, i_ptr, asize);
+        _free(i_ptr);
     }
+    return new_ptr;
 }
 
-void HeapManager::_free(void* p)
+void* HeapManager::_reallocBig(void* i_ptr, size_t i_sz)
 {
-    if (NULL == p) return;
+    // Currently all large allocations fall on a page boundry,
+    // but small allocatoins never do
+    if(ALIGN_PAGE(reinterpret_cast<uint64_t>(i_ptr)) != 
+       reinterpret_cast<uint64_t>(i_ptr))
+    {
+        return NULL;
+    }
+
+    void* new_ptr = NULL;
+    big_chunk_t * bc = big_chunk_stack.first();
+    while(bc)
+   {
+       if(bc->addr == i_ptr)
+       {
+           size_t new_size = ALIGN_PAGE(i_sz)/PAGESIZE;
+           if(new_size > bc->page_count)
+           {
+#ifdef HOSTBOOT_DEBUG
+               __sync_add_and_fetch(&g_big_chunks,new_size-bc->page_count);
+               if(g_bigheap_highwater < g_big_chunks)
+                   g_bigheap_highwater = g_big_chunks;
+#endif
+               new_ptr = PageManager::allocatePage(new_size);
 
-    chunk_t* chunk = (chunk_t*)(((size_t*)p)-1);
-    push_bucket(chunk, chunk->len);
+               memcpy(new_ptr,i_ptr,bc->page_count*PAGESIZE);
+
+               size_t page_count = bc->page_count;
+               bc->addr = new_ptr;
+               bc->page_count = new_size;
+               lwsync();
+
+               PageManager::freePage(i_ptr,page_count);
+           }
+           new_ptr = bc->addr;
+
+           break;
+       }
+       bc = bc->next;
+   }
+   return new_ptr;
+}
+
+void HeapManager::_free(void * i_ptr)
+{
+    if (NULL == i_ptr) return;
+
+    if(!_freeBig(i_ptr))
+    {
+        chunk_t* chunk = reinterpret_cast<chunk_t*>(((size_t*)i_ptr)-1);
+#ifdef HOSTBOOT_DEBUG
+        __sync_sub_and_fetch(&g_smallheap_count,1);
+        __sync_sub_and_fetch(&g_smallheap_allocated,bucketByteSize(chunk->bucket));
+#endif
+        push_bucket(chunk, chunk->bucket);
+    }
 }
 
-HeapManager::chunk_t* HeapManager::pop_bucket(size_t n)
+
+HeapManager::chunk_t* HeapManager::pop_bucket(size_t i_bucket)
 {
-    if (n >= BUCKETS) return NULL;
+    if (i_bucket >= BUCKETS) return NULL;
 
-    chunk_t* c = first_chunk[n].pop();
+    chunk_t* c = first_chunk[i_bucket].pop();
 
     if (NULL == c)
     {
-	// Couldn't allocate from the correct size bucket, so split up an
-	// item from the next sized bucket.
-	c = pop_bucket(n+1);
-	if (NULL != c)
-	{
-	    chunk_t* c1 = (chunk_t*)(((uint8_t*)c) + (1 << (n + 4)));
-	    c->len = n;
-	    c1->len = n;
-	    push_bucket(c1, n);
-	}
+        // Couldn't allocate from the correct size bucket, so split up an
+        // item from the next sized bucket.
+        c = pop_bucket(i_bucket+1);
+        if (NULL != c)
+        {
+            size_t c_size = bucketByteSize(i_bucket);
+            size_t c1_size = bucketByteSize(c->bucket) - c_size;
+            size_t c1_bucket = bucketIndex(c1_size);
+
+            chunk_t* c1 = reinterpret_cast<chunk_t*>(((uint8_t*)c) + c_size);
+            c1->bucket = c1_bucket;
+            c->bucket = i_bucket;
+
+            // c1_size should always be a valid size unless the FIB sequence is modified
+            // then we could end up with an 8 byte piece of junk.
+            if(c1_size >= MIN_BUCKET_SIZE)
+            {
+                push_bucket(c1, c1_bucket);
+            }
+        }
     }
 
     return c;
 }
 
-void HeapManager::push_bucket(chunk_t* c, size_t n)
+
+void HeapManager::push_bucket(chunk_t* i_chunk, size_t i_bucket)
 {
-    if (n >= BUCKETS) return;
-    first_chunk[n].push(c);
+    if (i_bucket >= BUCKETS) return;
+    i_chunk->free = true;
+    first_chunk[i_bucket].push(i_chunk);
 }
 
+
 void HeapManager::newPage()
 {
     void* page = PageManager::allocatePage();
-    chunk_t * c = (chunk_t*)page;
-    for (uint64_t i = 0; i < (PAGESIZE / (1 << (BUCKETS + 3))); i++)
+    chunk_t * c = reinterpret_cast<chunk_t*>(page);
+    size_t remaining = PAGESIZE;
+
+#ifdef HOSTBOOT_DEBUG
+    uint32_t idx = __sync_fetch_and_add(&g_smallheap_pages,1);
+    if(idx < SMALL_HEAP_PAGES_TRACKED)
+        g_smallHeapPages[idx] = page;
+#endif
+
+    while(remaining >= MIN_BUCKET_SIZE)
+    {
+        size_t bucket = bucketIndex(remaining);
+
+        // bucket might be one too big
+        if(bucket == BUCKETS || bucketByteSize(bucket) > remaining)
+        {
+            --bucket;
+        }
+        c->bucket = bucket;
+        push_bucket(c, bucket);
+
+        size_t bsize = bucketByteSize(bucket);
+        c = reinterpret_cast<chunk_t*>(((uint8_t*)c) + bsize);
+        remaining -= bsize;
+    }
+    // Note: if the fibonacci series originally used is modified, there could
+    // be a remainder.  Thow it away.
+}
+
+// find smallest bucket i_sz will fit into
+size_t HeapManager::bucketIndex(size_t i_sz)
+{
+
+    // A simple linear search loop is unrolled by the compiler
+    // and generates large asm code.
+    // 
+    // A manual unrole of a binary search using "if" statements is 160 bytes
+    // for this function and 160 bytes for the bucketByteSize() function
+    // but does not need the 96 byte cv_chunk_size array. Total 320 bytes
+    //
+    // This function is 120 bytes and it scales if more buckets are added
+    // bucketByteSize() using the static array uses 96 bytes. Total = 216 bytes
+
+    if(i_sz > cv_chunk_size[BUCKETS-1]) return BUCKETS;
+
+    // binary search
+    int64_t high_idx = BUCKETS - 1;
+    int64_t low_idx  = 0;
+    size_t bucket = 0;
+    while(low_idx <= high_idx)
     {
-	c->len = BUCKETS-1;
-	push_bucket(c, BUCKETS-1);
-	c = (chunk_t*)(((uint8_t*)c) + (1 << (BUCKETS + 3)));
+        bucket = (low_idx + high_idx) / 2;
+        if( i_sz > bucketByteSize(bucket))
+        {
+            low_idx = bucket + 1;
+        }
+        else
+        {
+            high_idx = bucket - 1;
+            if(i_sz > bucketByteSize(high_idx)) // high_idx would be too small
+                break;
+        }
     }
+    return bucket;
 }
+
+
+// all other processes must be quiesced
+void HeapManager::_coalesce()
+{
+    // Clean up the smaller heap
+    bool merged_one = false;
+    chunk_t head; head.next = NULL;
+    chunk_t * c = NULL;
+
+    // make a chain out of all the free chunks
+    // OPTION: A priority queue sorted by address could be used.
+    // This would improve performance when searching for chunks to coalesce, but
+    // would require a minimum bucket size of 32 bytes.
+    // TODO: sort by address
+    for(size_t bucket = 0; bucket < BUCKETS; ++bucket)
+    {
+        while(NULL != (c = first_chunk[bucket].pop()))
+        {
+            c->next = head.next;
+            head.next = c;
+        }
+    }
+
+    do
+    {
+        merged_one = false;
+        // look for chunks to coalesce
+        for(c = head.next; c!=NULL; c = c->next)
+        {
+            size_t s = bucketByteSize(c->bucket);
+            chunk_t * c_find = reinterpret_cast<chunk_t*>(((uint8_t*)c) + s);
+
+            // c_find must be in same page
+            if(reinterpret_cast<uint64_t>(c_find) < 
+               ALIGN_PAGE(reinterpret_cast<uint64_t>(c)))
+            {
+                if(c_find->free)
+                {
+                    // is combinable?
+                    size_t ns = s + bucketByteSize(c_find->bucket);
+                    size_t n_bucket = bucketIndex(ns);
+                    if(n_bucket < BUCKETS && bucketByteSize(n_bucket) == ns)
+                    {
+                        // find c_find in the free chain and remove it
+                        chunk_t * c_prev = &head;
+                        chunk_t * c_seek = head.next;
+                        while(c_seek != NULL)
+                        {
+                            if(c_find == c_seek) // found it -> merge
+                            {
+                                // new bigger chunk
+                                c->bucket = n_bucket;
+                                merged_one = true;
+                                ++cv_coalesce_count;
+
+                                // remove found elment from the chain
+                                c_prev->next = c_find->next;
+
+                                break;
+                            }
+                            c_prev = c_seek;
+                            c_seek = c_seek->next;
+                        }
+                    }
+                }
+            }
+        }
+    } while(merged_one); // repeat until nothing can be merged
+
+    // restore the free buckets
+    cv_free_chunks = 0;
+    cv_free_bytes = 0;
+    c = head.next;
+    while(c != NULL)
+    {
+        chunk_t * c_next = c->next;
+        push_bucket(c,c->bucket);
+        ++cv_free_chunks;
+        cv_free_bytes += bucketByteSize(c->bucket) - 8;
+        c = c_next;
+    }
+    printkd("HeapMgr coalesced total %ld\n",cv_coalesce_count);
+    test_pages();
+}
+
+void HeapManager::stats()
+{
+    coalesce();        // collects some  of the stats
+   
+    printkd("Memory Heap Stats:\n");
+    printkd("  %d Large heap pages allocated.\n",g_big_chunks);
+    printkd("  %d Large heap max allocated.\n",g_bigheap_highwater);
+    printkd("  %d Small heap pages.\n",g_smallheap_pages);
+    printkd("  %d Small heap bytes max allocated\n",g_smallheap_alloc_hw);
+    printkd("  %d Small heap bytes allocated in %d chunks\n",
+           g_smallheap_allocated,g_smallheap_count);
+    printkd("  %ld Small heap free bytes in %ld chunks\n",cv_free_bytes,cv_free_chunks);
+    printkd("  %ld Small heap total chunks coalesced\n",cv_coalesce_count);
+    printkd("Small heap bucket profile:\n");
+    for(size_t i = 0; i < BUCKETS; ++i)
+    {
+        printkd("  %d chunks of bytesize %ld\n",
+               g_bucket_counts[i],
+               cv_chunk_size[i]-8);
+    }
+
+    PageManager::coalesce();
+}
+
+
+void HeapManager::test_pages()
+{
+#ifdef HOSTBOOT_DEBUG
+    for(size_t i = 0; i < BUCKETS; ++i)
+        g_bucket_counts[i] = 0;
+
+    size_t max_idx = g_smallheap_pages;
+    if(max_idx > SMALL_HEAP_PAGES_TRACKED) max_idx = SMALL_HEAP_PAGES_TRACKED;
+    for(size_t i = 0; i < max_idx; ++i)
+    {
+        chunk_t* c = reinterpret_cast<chunk_t*>(g_smallHeapPages[i]);
+        uint8_t* c_prev = reinterpret_cast<uint8_t*>(c);
+        size_t sum = 0;
+        while(sum <= (PAGESIZE-MIN_BUCKET_SIZE))
+        {
+            size_t b = c->bucket;
+            if(b < BUCKETS)
+            {
+                size_t s = bucketByteSize(b);
+                c_prev = reinterpret_cast<uint8_t*>(c);
+                c = reinterpret_cast<chunk_t*>(((uint8_t*)c) + s);
+                sum += s;
+                ++g_bucket_counts[b];
+            }
+            else
+            {
+                printk("Heaptest: Corruption at %p on page %p."
+                       " Owner of %p may have scribbled on it\n",
+                       c,g_smallHeapPages[i],c_prev+8);
+                sum = PAGESIZE;
+                break;
+            }
+        }
+        if(sum > PAGESIZE)
+        {
+            printk("Heaptest: Page %p failed consistancy test\n",g_smallHeapPages[i]);
+        }
+    }
+#endif
+}
+
+void* HeapManager::_allocateBig(size_t i_sz)
+{
+    size_t pages = ALIGN_PAGE(i_sz)/PAGESIZE;
+    void* v = PageManager::allocatePage(pages);
+#ifdef HOSTBOOT_DEBUG
+    //printk("HEAPAB %p:%ld:%ld wasted %ld\n",v,pages,i_sz, pages*PAGESIZE - i_sz);
+    __sync_add_and_fetch(&g_big_chunks,pages);
+    if(g_bigheap_highwater < g_big_chunks)
+        g_bigheap_highwater = g_big_chunks;
+#endif
+
+    // If already have unused big_chunk_t object available then use it
+    // otherwise create a new one.
+    big_chunk_t * bc = big_chunk_stack.first();
+    while(bc)
+    {
+        if(bc->page_count == 0)
+        {
+            if(__sync_bool_compare_and_swap(&bc->addr,NULL,v))
+            {
+                bc->page_count = pages;
+                break;
+            }
+        }
+        bc = bc->next;
+    }
+    if(!bc)
+    {
+        bc = new big_chunk_t(v,pages);
+        big_chunk_stack.push(bc);
+    }
+
+    return v;
+}
+
+bool HeapManager::_freeBig(void* i_ptr)
+{
+    // Currently all large allocations fall on a page boundry,
+    // but small allocations never do
+    if(ALIGN_PAGE(reinterpret_cast<uint64_t>(i_ptr)) != 
+       reinterpret_cast<uint64_t>(i_ptr))
+        return false;
+
+    bool result = false;
+    big_chunk_t * bc = big_chunk_stack.first();
+    while(bc)
+    {
+        if(bc->addr == i_ptr)
+        {
+#ifdef HOSTBOOT_DEBUG
+            __sync_sub_and_fetch(&g_big_chunks,bc->page_count);
+#endif
+            size_t page_count = bc->page_count;
+            bc->page_count = 0;
+            bc->addr = NULL;
+            lwsync();
+
+            PageManager::freePage(i_ptr,page_count);
+
+            // no way to safely remove object from chain so leave it
+
+            result = true;
+            break;
+        }
+        bc = bc->next;
+    }
+    return result;
+}
+
diff --git a/src/kernel/makefile b/src/kernel/makefile
index 1ebcf98d5..2ec3db0d9 100644
--- a/src/kernel/makefile
+++ b/src/kernel/makefile
@@ -26,7 +26,7 @@ OBJS = start.o kernel.o console.o pagemgr.o heapmgr.o taskmgr.o cpumgr.o
 OBJS += syscall.o scheduler.o spinlock.o exception.o vmmmgr.o timemgr.o 
 OBJS += futexmgr.o ptmgr.o segmentmgr.o devicesegment.o basesegment.o
 OBJS += block.o cpuid.o misc.o msghandler.o blockmsghdlr.o stacksegment.o
-OBJS += softpatch_p7.o
+OBJS += softpatch_p7.o barrier.o
 
 include ${ROOTPATH}/config.mk
 
diff --git a/src/kernel/pagemgr.C b/src/kernel/pagemgr.C
index 826e1bc4d..82e743ef1 100644
--- a/src/kernel/pagemgr.C
+++ b/src/kernel/pagemgr.C
@@ -24,8 +24,10 @@
 #include <kernel/pagemgr.H>
 #include <util/singleton.H>
 #include <kernel/console.H>
-#include <sys/vfs.h>
 #include <arch/ppc.H>
+#include <util/locked/pqueue.H>
+
+size_t PageManager::cv_coalesce_count = 0;
 
 void PageManager::init()
 {
@@ -51,14 +53,11 @@ uint64_t PageManager::queryAvail()
 
 PageManager::PageManager() : iv_pagesAvail(0), iv_pagesTotal(0)
 {
-    // Determine first page of un-allocated memory.
-    uint64_t addr = (uint64_t) VFS_LAST_ADDRESS;
-    if (0 != (addr % PAGESIZE))
-	addr = (addr - (addr % PAGESIZE)) + PAGESIZE;
-
-    // Determine number of pages available.
-    page_t* page = (page_t*)((void*) addr);
+    // Determine first page of un-allocated memory
+    // and number of pages available.
+    uint64_t addr = firstPageAddr();
     size_t length = (MEMLEN - addr) / PAGESIZE;
+    page_t* page = reinterpret_cast<page_t *>(addr);
 
     iv_pagesTotal = length;
     // Update statistics.
@@ -70,7 +69,7 @@ PageManager::PageManager() : iv_pagesAvail(0), iv_pagesTotal(0)
 	   (uint64_t)page);
 
     // Populate L3 cache lines.
-    uint64_t* cache_line = (uint64_t*) addr;
+    uint64_t* cache_line = reinterpret_cast<uint64_t*>(addr);
     uint64_t* end_cache_line = (uint64_t*) VmmManager::FULL_MEM_SIZE;
     while (cache_line != end_cache_line)
     {
@@ -163,3 +162,76 @@ void PageManager::push_bucket(page_t* p, size_t n)
     first_page[n].push(p);
 }
 
+void PageManager::coalesce( void )
+{
+    Singleton<PageManager>::instance()._coalesce();
+}
+
+
+// Coalsesce adjacent free memory blocks
+void PageManager::_coalesce( void )
+{
+    // Look at all the "free buckets" and find blocks to merge
+    // Since this is binary, all merges will be from the same free bucket
+    // Each bucket is a stack of non-allocated memory blocks of the same size
+    // Once two blocks are merged they become a single block twice the size.
+    // The source blocks must be removed from the current bucket (stack) and
+    // the new block needs to be pushed onto the next biggest stack.
+    for(size_t bucket = 0; bucket < (BUCKETS-1); ++bucket)
+    {
+        // Move the this stack bucket into a priority queue
+        // sorted by address, highest to lowest
+        Util::Locked::PQueue<page_t,page_t*> pq;
+        page_t * p = NULL;
+        while(NULL != (p = first_page[bucket].pop()))
+        {
+            p->key = p;
+            pq.insert(p);
+        }
+
+        while(NULL != (p = pq.remove()))
+        {
+            // p needs to be the even buddy to prevent merging of wrong block.
+            // To determine this, get the index of the block as if the whole 
+            // page memory space were blocks of this size.
+            uint64_t p_idx = (reinterpret_cast<uint64_t>(p) - firstPageAddr())/
+                             ((1 << bucket)*PAGESIZE);
+            if(0 != (p_idx % 2))  // odd index
+            {
+                first_page[bucket].push(p);  // can't merge
+            }
+            else // it's even
+            {
+                // If p can be merged then the next block in pq will be the
+                // match.  The address of p also can't be greater than what's
+                // in pq or something is really messed up, therefore if
+                // pq.remove_if() returns something then it's a match.
+                page_t * p_seek = (page_t*)((uint64_t)p + 
+                                            (1 << bucket)*PAGESIZE);
+                page_t * p_next = pq.remove_if(p_seek);
+                if(p_next == p_seek)
+                {
+                    // new block is twice the size and goes into the next
+                    // bucket size
+                    push_bucket(p,bucket+1);
+                    ++cv_coalesce_count;
+                }
+                else
+                {
+                    // Can't merge p
+                    first_page[bucket].push(p);
+
+                    if(p_next) // This should be null - if then overlaping mem
+                    {
+                        first_page[bucket].push(p_next);
+                        printk("pagemgr::coalesce Expected %p, got %p\n",
+                               p_seek, p_next);
+                    }
+                }
+            }
+        }
+    }
+    printkd("PAGEMGR coalesced total %ld\n", cv_coalesce_count);
+}
+
+
diff --git a/src/kernel/scheduler.C b/src/kernel/scheduler.C
index 8a8da89a4..10dda6708 100644
--- a/src/kernel/scheduler.C
+++ b/src/kernel/scheduler.C
@@ -81,8 +81,6 @@ void Scheduler::setNextRunnable()
     task_t* t = NULL;
     cpu_t* cpu = CpuManager::getCurrentCPU();
 
-    CpuManager::executePeriodics(cpu);
-
     // Check for ready task in local run-queue, if it exists.
     if (NULL != cpu->scheduler_extra)
     {
diff --git a/src/kernel/syscall.C b/src/kernel/syscall.C
index 53fdb287e..87cfba6bd 100644
--- a/src/kernel/syscall.C
+++ b/src/kernel/syscall.C
@@ -38,6 +38,7 @@
 #include <kernel/msghandler.H>
 #include <kernel/vmmmgr.H>
 #include <kernel/stacksegment.H>
+#include <kernel/heapmgr.H>
 
 extern "C"
 void kernel_execute_decrementer()
@@ -47,8 +48,17 @@ void kernel_execute_decrementer()
     TimeManager::checkReleaseTasks(s);
     s->returnRunnable();
 
+    CpuManager::executePeriodics(c);//TODO is there still a potential deadlock?
+
     if (CpuManager::isShutdownRequested())
     {
+        // The code below could cause a hang during shutdown
+        // The stats can be retrieved from global variables as needed.
+        // This can be uncommented for debug if desired
+#ifdef __MEMSTATS__
+        if(c->master)
+            HeapManager::stats();
+#endif
         KernelMisc::shutdown();
     }
 
diff --git a/src/lib/stdlib.C b/src/lib/stdlib.C
index b7cf92455..d9fa9e9fc 100644
--- a/src/lib/stdlib.C
+++ b/src/lib/stdlib.C
@@ -25,63 +25,55 @@
 #include <string.h>
 #include <kernel/heapmgr.H>
 #include <kernel/pagemgr.H>
+#include <kernel/console.H>
+
+
+#ifdef MEM_ALLOC_PROFILE
+// alloc profile
+uint16_t g_0 = 0;
+uint16_t g_8 = 0;
+uint16_t g_16 = 0;
+uint16_t g_32 = 0;
+uint16_t g_64 = 0;
+uint16_t g_128 = 0;
+uint16_t g_256 = 0;
+uint16_t g_512 = 0;
+uint16_t g_1k = 0;
+uint16_t g_2k = 0;
+uint16_t g_big = 0;
+#endif
 
 void* malloc(size_t s)
 {
-    if (s > HeapManager::MAX_ALLOC_SIZE)
-    {
-	size_t pages = (s + 8 + (PAGESIZE - 1)) 
-                     / PAGESIZE;
-	void* v = PageManager::allocatePage(pages);
-	size_t* len = (size_t*)v;
-	*len = pages << 8;
-	len++;
-	return len;
-    }
-    else
-	return HeapManager::allocate(s);
+#ifdef MEM_ALLOC_PROFILE
+    if(s == 0) ++g_0;
+    else if (s <= 8) ++g_8;
+    else if (s <= 16) ++g_16;
+    else if (s <= 32) ++g_32;
+    else if (s <= 64) ++g_64;
+    else if (s <= 128) ++g_128;
+    else if (s <= 256) ++g_256;
+    else if (s <= 512) ++g_512;
+    else if (s <= 1024) ++g_1k;
+    else if (s <= 2048) ++g_2k;
+    else ++g_big;
+#endif
+    return HeapManager::allocate(s);
 }
 
+
 void free(void* p)
 {
     if (NULL == p) return;
 
-    size_t* len = (size_t*)p;
-    len--;
-
-    if ((*len) > 0xff)
-    {
-	PageManager::freePage(len, (*len) >> 8);
-    }
-    else
-    {
-	HeapManager::free(p);
-    }
+    HeapManager::free(p);
 }
 
+
 void* realloc(void* p, size_t s)
 {
     if (NULL == p) return malloc(s);
-    
-    size_t* len = (size_t*)p;
-    len--;
-
-    size_t cur_size;
-    if ((*len) > 0xff)
-    {
-        cur_size = ((*len) >> 8) * PAGESIZE - 8;
-    }
-    else
-    {
-        cur_size = (1 << (*len + 4)) - 8;
-    }
-    
-    if (s <= cur_size)
-        return p;
 
-    void* new_p = malloc(s);
-    memcpy(new_p, p, cur_size);
-    free(p);
+    return HeapManager::realloc(p,s);
+}    
 
-    return new_p;
-}
diff --git a/src/makefile b/src/makefile
index 2f1449342..e7cddc664 100644
--- a/src/makefile
+++ b/src/makefile
@@ -28,7 +28,7 @@ IMGS = hbicore hbicore_test
 EXTRA_LIDS = dslid
 
 BASE_OBJECTS = console.o spinlock.o string.o stdlib.o assert.o stdio.o \
-               builtins.o vfs_init.o heapmgr.o pagemgr.o math.o
+               builtins.o vfs_init.o heapmgr.o pagemgr.o math.o barrier.o
 
 DIRECT_BOOT_OBJECTS = start.o kernel.o taskmgr.o cpumgr.o syscall.o \
                       scheduler.o exception.o vmmmgr.o timemgr.o \
diff --git a/src/usr/trace/trace.C b/src/usr/trace/trace.C
index 8a8456bb7..f14978854 100644
--- a/src/usr/trace/trace.C
+++ b/src/usr/trace/trace.C
@@ -35,6 +35,7 @@
 /******************************************************************************/
 #include <trace/interface.H>
 #include <stdarg.h>
+#include <stdlib.h>
 #include <arch/ppc.H>
 #include <kernel/console.H>
 #include <kernel/pagemgr.H>
@@ -66,9 +67,6 @@ const uint32_t COMP_NAME_SIZE   = 16; // NULL terminated string
 // Initial implementation is to allocate a fixed 2KB buffer to each
 // component on request.
 // NOTE: any change to this value will require change to Trace::initBuffer()
-// since currently malloc() does not work for large allocations/fragmentations
-// and we are using PageManager::allocatePage() to allocate space for two
-// buffers at a time.  Once malloc() works, we can remove this constraint.
 const uint64_t TRAC_DEFAULT_BUFFER_SIZE = 0x0800;  //2KB
 
 // NOTE: This constant should only be changed to an even number for now.
@@ -189,17 +187,14 @@ void Trace::initBuffer(trace_desc_t **o_td, const char* i_comp,
                 {
                     //printk("Trace::initBuffer - allocate memory\n");
 
-                    // Allocate memory for two buffers.
-                    // Once malloc() works, we can allocate memory for one
-                    // one buffer at a time.
-                    l_td = static_cast<char *>(PageManager::allocatePage());
-
+                    // Allocate memory
+                    // TODO can't handle i_size yet - everything is coded
+                    // around TRAC_DEFAULT_BUFFER_SIZE
+                    l_td = static_cast<char *>(malloc(TRAC_DEFAULT_BUFFER_SIZE));
+                    
                     g_desc_array[i].td_entry =
                         reinterpret_cast<trace_desc_t *>(l_td);
 
-                    g_desc_array[i+1].td_entry =
-                        reinterpret_cast<trace_desc_t *>(
-                            l_td + TRAC_DEFAULT_BUFFER_SIZE);
                 }
 
                 // Initialize the buffer header
@@ -235,12 +230,8 @@ void Trace::initBuffer(trace_desc_t **o_td, const char* i_comp,
                     //printk("Trace::initBuffer - allocate memory\n");
 
                     // Allocate memory for buffer
-                    l_td = static_cast<char *>(PageManager::allocatePage());
+                    l_td = static_cast<char *>(malloc(TRAC_DEFAULT_BUFFER_SIZE));
 
-                    // Throw away the last 2KB for now to keep code simple
-                    // until we decide to add support for variable-sized
-                    // buffers.  Also, once we change to use malloc(),
-                    // we won't have this problem.
                     g_desc_array[i].td_entry =
                         reinterpret_cast<trace_desc_t *>(l_td);
                 }
author	Doug Gilbert <dgilbert@us.ibm.com>	2011-09-26 13:36:33 -0500
committer	Douglas R. Gilbert <dgilbert@us.ibm.com>	2011-10-25 11:16:20 -0500
commit	5ab488739184f2b2649193e3f9da695ee334d04f (patch)
tree	3d47e74b8dd290598527988adccff0ff57c72dc0 /src
parent	d127ad9d985ffd7a42dba798bee66654242c4fe6 (diff)
download	blackbird-hostboot-5ab488739184f2b2649193e3f9da695ee334d04f.tar.gz blackbird-hostboot-5ab488739184f2b2649193e3f9da695ee334d04f.zip