diff options
Diffstat (limited to 'libgo/runtime')
26 files changed, 982 insertions, 678 deletions
diff --git a/libgo/runtime/go-byte-array-to-string.c b/libgo/runtime/go-byte-array-to-string.c index 531730654d0..1b9ac2d7964 100644 --- a/libgo/runtime/go-byte-array-to-string.c +++ b/libgo/runtime/go-byte-array-to-string.c @@ -16,7 +16,7 @@ __go_byte_array_to_string (const void* p, size_t len) struct __go_string ret; bytes = (const unsigned char *) p; - retdata = runtime_mallocgc (len, RefNoPointers, 1, 0); + retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0); __builtin_memcpy (retdata, bytes, len); ret.__data = retdata; ret.__length = len; diff --git a/libgo/runtime/go-int-array-to-string.c b/libgo/runtime/go-int-array-to-string.c index 46a33dafc2e..c16589f01a2 100644 --- a/libgo/runtime/go-int-array-to-string.c +++ b/libgo/runtime/go-int-array-to-string.c @@ -41,7 +41,7 @@ __go_int_array_to_string (const void* p, size_t len) slen += 4; } - retdata = runtime_mallocgc (slen, RefNoPointers, 1, 0); + retdata = runtime_mallocgc (slen, FlagNoPointers, 1, 0); ret.__data = retdata; ret.__length = slen; diff --git a/libgo/runtime/go-int-to-string.c b/libgo/runtime/go-int-to-string.c index 24d729cf893..af58015ed8f 100644 --- a/libgo/runtime/go-int-to-string.c +++ b/libgo/runtime/go-int-to-string.c @@ -51,7 +51,7 @@ __go_int_to_string (int v) } } - retdata = runtime_mallocgc (len, RefNoPointers, 1, 0); + retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0); __builtin_memcpy (retdata, buf, len); ret.__data = retdata; ret.__length = len; diff --git a/libgo/runtime/go-new.c b/libgo/runtime/go-new.c index a592174e50f..8f25c5730ef 100644 --- a/libgo/runtime/go-new.c +++ b/libgo/runtime/go-new.c @@ -17,5 +17,5 @@ __go_new (size_t size) void * __go_new_nopointers (size_t size) { - return runtime_mallocgc (size, RefNoPointers, 1, 1); + return runtime_mallocgc (size, FlagNoPointers, 1, 1); } diff --git a/libgo/runtime/go-panic.c b/libgo/runtime/go-panic.c index 48d64416297..b684779cda8 100644 --- a/libgo/runtime/go-panic.c +++ b/libgo/runtime/go-panic.c @@ -112,7 +112,7 @@ __go_panic_msg (const char* msg) struct __go_empty_interface arg; len = __builtin_strlen (msg); - sdata = runtime_mallocgc (len, RefNoPointers, 0, 0); + sdata = runtime_mallocgc (len, FlagNoPointers, 0, 0); __builtin_memcpy (sdata, msg, len); s.__data = sdata; s.__length = len; diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c index 3b646c81abe..10c565e39a5 100644 --- a/libgo/runtime/go-string-to-byte-array.c +++ b/libgo/runtime/go-string-to-byte-array.c @@ -15,7 +15,7 @@ __go_string_to_byte_array (struct __go_string str) unsigned char *data; struct __go_open_array ret; - data = (unsigned char *) runtime_mallocgc (str.__length, RefNoPointers, 1, 0); + data = (unsigned char *) runtime_mallocgc (str.__length, FlagNoPointers, 1, 0); __builtin_memcpy (data, str.__data, str.__length); ret.__values = (void *) data; ret.__count = str.__length; diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c index 8d7f94f93ab..f59df6739f1 100644 --- a/libgo/runtime/go-string-to-int-array.c +++ b/libgo/runtime/go-string-to-int-array.c @@ -31,7 +31,7 @@ __go_string_to_int_array (struct __go_string str) p += __go_get_rune (p, pend - p, &rune); } - data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), RefNoPointers, + data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), FlagNoPointers, 1, 0); p = str.__data; pd = data; diff --git a/libgo/runtime/go-strplus.c b/libgo/runtime/go-strplus.c index c0cd356ca63..e4dea9c4690 100644 --- a/libgo/runtime/go-strplus.c +++ b/libgo/runtime/go-strplus.c @@ -21,7 +21,7 @@ __go_string_plus (struct __go_string s1, struct __go_string s2) return s1; len = s1.__length + s2.__length; - retdata = runtime_mallocgc (len, RefNoPointers, 1, 0); + retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0); __builtin_memcpy (retdata, s1.__data, s1.__length); __builtin_memcpy (retdata + s1.__length, s2.__data, s2.__length); ret.__data = retdata; diff --git a/libgo/runtime/go-type.h b/libgo/runtime/go-type.h index b1f32850a00..76681217fdf 100644 --- a/libgo/runtime/go-type.h +++ b/libgo/runtime/go-type.h @@ -94,6 +94,10 @@ struct __go_type_descriptor /* A pointer to fields which are only used for some types. */ const struct __go_uncommon_type *__uncommon; + + /* The descriptor for the type which is a pointer to this type. + This may be NULL. */ + const struct __go_type_descriptor *__pointer_to_this; }; /* The information we store for each method of a type. */ diff --git a/libgo/runtime/go-unsafe-pointer.c b/libgo/runtime/go-unsafe-pointer.c index 804360f8a89..9ec18003f1f 100644 --- a/libgo/runtime/go-unsafe-pointer.c +++ b/libgo/runtime/go-unsafe-pointer.c @@ -51,6 +51,8 @@ const struct __go_type_descriptor unsafe_Pointer = /* __reflection */ &reflection_string, /* __uncommon */ + NULL, + /* __pointer_to_this */ NULL }; @@ -90,6 +92,8 @@ const struct __go_ptr_type pointer_unsafe_Pointer = /* __reflection */ &preflection_string, /* __uncommon */ + NULL, + /* __pointer_to_this */ NULL }, /* __element_type */ diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc index d826d479f5c..5f99c4b73a4 100644 --- a/libgo/runtime/malloc.goc +++ b/libgo/runtime/malloc.goc @@ -45,14 +45,13 @@ fastrand1(void) // Small objects are allocated from the per-thread cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. void* -runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) +runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) { int32 sizeclass, rate; MCache *c; uintptr npages; MSpan *s; void *v; - uint32 *ref; if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1)) runtime_throw("malloc/free - deadlock"); @@ -71,12 +70,6 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) mstats.alloc += size; mstats.total_alloc += size; mstats.by_size[sizeclass].nmalloc++; - - if(!runtime_mlookup(v, nil, nil, nil, &ref)) { - // runtime_printf("malloc %D; runtime_mlookup failed\n", (uint64)size); - runtime_throw("malloc runtime_mlookup"); - } - *ref = RefNone | refflag; } else { // TODO(rsc): Report tracebacks for very large allocations. @@ -93,14 +86,15 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) v = (void*)(s->start << PageShift); // setup for mark sweep - s->gcref0 = RefNone | refflag; - ref = &s->gcref0; + runtime_markspan(v, 0, 0, true); } + if(!(flag & FlagNoGC)) + runtime_markallocated(v, size, (flag&FlagNoPointers) != 0); __sync_bool_compare_and_swap(&m->mallocing, 1, 0); if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) { - if(!(refflag & RefNoProfiling)) + if(!(flag & FlagNoProfiling)) __go_run_goroutine_gc(0); else { // We are being called from the profiler. Tell it @@ -110,7 +104,7 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) } } - if(!(refflag & RefNoProfiling) && (rate = runtime_MemProfileRate) > 0) { + if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) { if(size >= (uint32) rate) goto profile; if((uint32) m->mcache->next_sample > size) @@ -121,7 +115,7 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed) rate = 0x3fffffff; m->mcache->next_sample = fastrand1() % (2*rate); profile: - *ref |= RefProfiled; + runtime_setblockspecial(v); runtime_MProf_Malloc(v, size); } } @@ -141,32 +135,37 @@ __go_alloc(uintptr size) void __go_free(void *v) { - int32 sizeclass, size; + int32 sizeclass; MSpan *s; MCache *c; - uint32 prof, *ref; + uint32 prof; + uintptr size; if(v == nil) return; + + // If you change this also change mgc0.c:/^sweepspan, + // which has a copy of the guts of free. if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1)) runtime_throw("malloc/free - deadlock"); - if(!runtime_mlookup(v, nil, nil, &s, &ref)) { + if(!runtime_mlookup(v, nil, nil, &s)) { // runtime_printf("free %p: not an allocated block\n", v); runtime_throw("free runtime_mlookup"); } - prof = *ref & RefProfiled; - *ref = RefFree; + prof = runtime_blockspecial(v); // Find size class for v. sizeclass = s->sizeclass; if(sizeclass == 0) { // Large object. - if(prof) - runtime_MProf_Free(v, s->npages<<PageShift); - mstats.alloc -= s->npages<<PageShift; - runtime_memclr(v, s->npages<<PageShift); + size = s->npages<<PageShift; + *(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed" + // Must mark v freed before calling unmarkspan and MHeap_Free: + // they might coalesce v into other spans and change the bitmap further. + runtime_markfreed(v, size); + runtime_unmarkspan(v, 1<<PageShift); runtime_MHeap_Free(&runtime_mheap, s, 1); } else { // Small object. @@ -174,12 +173,17 @@ __go_free(void *v) size = runtime_class_to_size[sizeclass]; if(size > (int32)sizeof(uintptr)) ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed" - if(prof) - runtime_MProf_Free(v, size); - mstats.alloc -= size; + // Must mark v freed before calling MCache_Free: + // it might coalesce v and other blocks into a bigger span + // and change the bitmap further. + runtime_markfreed(v, size); mstats.by_size[sizeclass].nfree++; runtime_MCache_Free(c, v, sizeclass, size); } + mstats.alloc -= size; + if(prof) + runtime_MProf_Free(v, size); + __sync_bool_compare_and_swap(&m->mallocing, 1, 0); if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) @@ -187,23 +191,22 @@ __go_free(void *v) } int32 -runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) +runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp) { - uintptr n, nobj, i; + uintptr n, i; byte *p; MSpan *s; mstats.nlookup++; - s = runtime_MHeap_LookupMaybe(&runtime_mheap, (uintptr)v>>PageShift); + s = runtime_MHeap_LookupMaybe(&runtime_mheap, v); if(sp) *sp = s; if(s == nil) { + runtime_checkfreed(v, 1); if(base) *base = nil; if(size) *size = 0; - if(ref) - *ref = 0; return 0; } @@ -214,14 +217,11 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) *base = p; if(size) *size = s->npages<<PageShift; - if(ref) - *ref = &s->gcref0; return 1; } - if((byte*)v >= (byte*)s->gcref) { - // pointers into the gc ref counts - // do not count as pointers. + if((byte*)v >= (byte*)s->limit) { + // pointers past the last block do not count as pointers. return 0; } @@ -232,21 +232,6 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref) if(size) *size = n; - // good for error checking, but expensive - if(0) { - nobj = (s->npages << PageShift) / (n + RefcountOverhead); - if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) { - // runtime_printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n", - // s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages); - // runtime_printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n", - // s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift, - // (uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift)); - runtime_throw("bad gcref"); - } - } - if(ref) - *ref = &s->gcref[i]; - return 1; } @@ -278,16 +263,90 @@ runtime_allocmcache(void) extern int32 runtime_sizeof_C_MStats __asm__ ("libgo_runtime.runtime.Sizeof_C_MStats"); +#define MaxArena32 (2U<<30) + void runtime_mallocinit(void) { - runtime_sizeof_C_MStats = sizeof(MStats); + byte *p; + uintptr arena_size, bitmap_size; + extern byte end[]; - runtime_initfintab(); - runtime_Mprof_Init(); + runtime_sizeof_C_MStats = sizeof(MStats); - runtime_SysMemInit(); runtime_InitSizes(); + + // Set up the allocation arena, a contiguous area of memory where + // allocated data will be found. The arena begins with a bitmap large + // enough to hold 4 bits per allocated word. + if(sizeof(void*) == 8) { + // On a 64-bit machine, allocate from a single contiguous reservation. + // 16 GB should be big enough for now. + // + // The code will work with the reservation at any address, but ask + // SysReserve to use 0x000000f800000000 if possible. + // Allocating a 16 GB region takes away 36 bits, and the amd64 + // doesn't let us choose the top 17 bits, so that leaves the 11 bits + // in the middle of 0x00f8 for us to choose. Choosing 0x00f8 means + // that the valid memory addresses will begin 0x00f8, 0x00f9, 0x00fa, 0x00fb. + // None of the bytes f8 f9 fa fb can appear in valid UTF-8, and + // they are otherwise as far from ff (likely a common byte) as possible. + // Choosing 0x00 for the leading 6 bits was more arbitrary, but it + // is not a common ASCII code point either. Using 0x11f8 instead + // caused out of memory errors on OS X during thread allocations. + // These choices are both for debuggability and to reduce the + // odds of the conservative garbage collector not collecting memory + // because some non-pointer block of memory had a bit pattern + // that matched a memory address. + // + // Actually we reserve 17 GB (because the bitmap ends up being 1 GB) + // but it hardly matters: fc is not valid UTF-8 either, and we have to + // allocate 15 GB before we get that far. + arena_size = (uintptr)(16LL<<30); + bitmap_size = arena_size / (sizeof(void*)*8/4); + p = runtime_SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size); + if(p == nil) + runtime_throw("runtime: cannot reserve arena virtual address space"); + } else { + // On a 32-bit machine, we can't typically get away + // with a giant virtual address space reservation. + // Instead we map the memory information bitmap + // immediately after the data segment, large enough + // to handle another 2GB of mappings (256 MB), + // along with a reservation for another 512 MB of memory. + // When that gets used up, we'll start asking the kernel + // for any memory anywhere and hope it's in the 2GB + // following the bitmap (presumably the executable begins + // near the bottom of memory, so we'll have to use up + // most of memory before the kernel resorts to giving out + // memory before the beginning of the text segment). + // + // Alternatively we could reserve 512 MB bitmap, enough + // for 4GB of mappings, and then accept any memory the + // kernel threw at us, but normally that's a waste of 512 MB + // of address space, which is probably too much in a 32-bit world. + bitmap_size = MaxArena32 / (sizeof(void*)*8/4); + arena_size = 512<<20; + + // SysReserve treats the address we ask for, end, as a hint, + // not as an absolute requirement. If we ask for the end + // of the data segment but the operating system requires + // a little more space before we can start allocating, it will + // give out a slightly higher pointer. That's fine. + // Run with what we get back. + p = runtime_SysReserve(end, bitmap_size + arena_size); + if(p == nil) + runtime_throw("runtime: cannot reserve arena virtual address space"); + } + if((uintptr)p & (((uintptr)1<<PageShift)-1)) + runtime_throw("runtime: SysReserve returned unaligned address"); + + runtime_mheap.bitmap = p; + runtime_mheap.arena_start = p + bitmap_size; + runtime_mheap.arena_used = runtime_mheap.arena_start; + runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size; + + // Initialize the rest of the allocator. runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc); m->mcache = runtime_allocmcache(); @@ -295,6 +354,47 @@ runtime_mallocinit(void) runtime_free(runtime_malloc(1)); } +void* +runtime_MHeap_SysAlloc(MHeap *h, uintptr n) +{ + byte *p; + + if(n <= (uintptr)(h->arena_end - h->arena_used)) { + // Keep taking from our reservation. + p = h->arena_used; + runtime_SysMap(p, n); + h->arena_used += n; + runtime_MHeap_MapBits(h); + return p; + } + + // On 64-bit, our reservation is all we have. + if(sizeof(void*) == 8) + return nil; + + // On 32-bit, once the reservation is gone we can + // try to get memory at a location chosen by the OS + // and hope that it is in the range we allocated bitmap for. + p = runtime_SysAlloc(n); + if(p == nil) + return nil; + + if(p < h->arena_start || (uintptr)(p+n - h->arena_start) >= MaxArena32) { + runtime_printf("runtime: memory allocated by OS not in usable range"); + runtime_SysFree(p, n); + return nil; + } + + if(p+n > h->arena_used) { + h->arena_used = p+n; + if(h->arena_used > h->arena_end) + h->arena_end = h->arena_used; + runtime_MHeap_MapBits(h); + } + + return p; +} + // Runtime stubs. void* @@ -303,6 +403,10 @@ runtime_mal(uintptr n) return runtime_mallocgc(n, 0, 1, 1); } +func new(n uint32) (ret *uint8) { + ret = runtime_mal(n); +} + func Alloc(n uintptr) (p *byte) { p = runtime_malloc(n); } @@ -312,7 +416,7 @@ func Free(p *byte) { } func Lookup(p *byte) (base *byte, size uintptr) { - runtime_mlookup(p, &base, &size, nil, nil); + runtime_mlookup(p, &base, &size, nil); } func GC() { @@ -333,7 +437,7 @@ func SetFinalizer(obj Eface, finalizer Eface) { // runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string); goto throw; } - if(!runtime_mlookup(obj.__object, &base, &size, nil, nil) || obj.__object != base) { + if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) { // runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n"); goto throw; } diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h index 369f9b8e771..8131e964e49 100644 --- a/libgo/runtime/malloc.h +++ b/libgo/runtime/malloc.h @@ -19,7 +19,6 @@ // used to manage storage used by the allocator. // MHeap: the malloc heap, managed at page (4096-byte) granularity. // MSpan: a run of pages managed by the MHeap. -// MHeapMap: a mapping from page IDs to MSpans. // MCentral: a shared free list for a given size class. // MCache: a per-thread (in Go, per-M) cache for small objects. // MStats: allocation statistics. @@ -84,7 +83,6 @@ typedef struct FixAlloc FixAlloc; typedef struct MCentral MCentral; typedef struct MHeap MHeap; -typedef struct MHeapMap MHeapMap; typedef struct MSpan MSpan; typedef struct MStats MStats; typedef struct MLink MLink; @@ -99,8 +97,14 @@ typedef uintptr PageID; // address >> PageShift enum { + // Computed constant. The definition of MaxSmallSize and the + // algorithm in msize.c produce some number of different allocation + // size classes. NumSizeClasses is that number. It's needed here + // because there are static arrays of this length; when msize runs its + // size choosing algorithm it double-checks that NumSizeClasses agrees. + NumSizeClasses = 61, + // Tunable constants. - NumSizeClasses = 67, // Number of size classes (must match msize.c) MaxSmallSize = 32<<10, FixAllocChunk = 128<<10, // Chunk size for FixAlloc @@ -108,13 +112,16 @@ enum MaxMCacheSize = 2<<20, // Maximum bytes in one MCache MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap. HeapAllocChunk = 1<<20, // Chunk size for heap growth -}; + // Number of bits in page to span calculations (4k pages). + // On 64-bit, we limit the arena to 16G, so 22 bits suffices. + // On 32-bit, we don't bother limiting anything: 20 bits for 4G. #if __SIZEOF_POINTER__ == 8 -#include "mheapmap64.h" + MHeapMap_Bits = 22, #else -#include "mheapmap32.h" + MHeapMap_Bits = 20, #endif +}; // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) struct MLink @@ -124,7 +131,8 @@ struct MLink // SysAlloc obtains a large chunk of zeroed memory from the // operating system, typically on the order of a hundred kilobytes -// or a megabyte. +// or a megabyte. If the pointer argument is non-nil, the caller +// wants a mapping there or nowhere. // // SysUnused notifies the operating system that the contents // of the memory region are no longer needed and can be reused @@ -134,11 +142,19 @@ struct MLink // SysFree returns it unconditionally; this is only used if // an out-of-memory error has been detected midway through // an allocation. It is okay if SysFree is a no-op. +// +// SysReserve reserves address space without allocating memory. +// If the pointer passed to it is non-nil, the caller wants the +// reservation there, but SysReserve can still choose another +// location if that one is unavailable. +// +// SysMap maps previously reserved address space for use. void* runtime_SysAlloc(uintptr nbytes); void runtime_SysFree(void *v, uintptr nbytes); void runtime_SysUnused(void *v, uintptr nbytes); -void runtime_SysMemInit(void); +void runtime_SysMap(void *v, uintptr nbytes); +void* runtime_SysReserve(void *v, uintptr nbytes); // FixAlloc is a simple free-list allocator for fixed size objects. // Malloc uses a FixAlloc wrapped around SysAlloc to manages its @@ -194,7 +210,6 @@ struct MStats uint64 mspan_sys; uint64 mcache_inuse; // MCache structures uint64 mcache_sys; - uint64 heapmap_sys; // heap map uint64 buckhash_sys; // profiling bucket hash table // Statistics about garbage collector. @@ -281,10 +296,7 @@ struct MSpan uint32 ref; // number of allocated objects in this span uint32 sizeclass; // size class uint32 state; // MSpanInUse etc - union { - uint32 *gcref; // sizeclass > 0 - uint32 gcref0; // sizeclass == 0 - }; + byte *limit; // end of data in span }; void runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages); @@ -323,11 +335,14 @@ struct MHeap MSpan *allspans; // span lookup - MHeapMap map; + MSpan *map[1<<MHeapMap_Bits]; // range of addresses we might see in the heap - byte *min; - byte *max; + byte *bitmap; + uintptr bitmap_mapped; + byte *arena_start; + byte *arena_used; + byte *arena_end; // central free lists for small size classes. // the union makes sure that the MCentrals are @@ -346,31 +361,31 @@ extern MHeap runtime_mheap; void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr)); MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct); void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct); -MSpan* runtime_MHeap_Lookup(MHeap *h, PageID p); -MSpan* runtime_MHeap_LookupMaybe(MHeap *h, PageID p); -void runtime_MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj); +MSpan* runtime_MHeap_Lookup(MHeap *h, void *v); +MSpan* runtime_MHeap_LookupMaybe(MHeap *h, void *v); +void runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); +void* runtime_MHeap_SysAlloc(MHeap *h, uintptr n); +void runtime_MHeap_MapBits(MHeap *h); void* runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); -int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref); +int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s); void runtime_gc(int32 force); - -void* runtime_SysAlloc(uintptr); -void runtime_SysUnused(void*, uintptr); -void runtime_SysFree(void*, uintptr); +void runtime_markallocated(void *v, uintptr n, bool noptr); +void runtime_checkallocated(void *v, uintptr n); +void runtime_markfreed(void *v, uintptr n); +void runtime_checkfreed(void *v, uintptr n); +int32 runtime_checking; +void runtime_markspan(void *v, uintptr size, uintptr n, bool leftover); +void runtime_unmarkspan(void *v, uintptr size); +bool runtime_blockspecial(void*); +void runtime_setblockspecial(void*); enum { - RefcountOverhead = 4, // one uint32 per object - - RefFree = 0, // must be zero - RefStack, // stack segment - don't free and don't scan for pointers - RefNone, // no references - RefSome, // some references - RefNoPointers = 0x80000000U, // flag - no pointers here - RefHasFinalizer = 0x40000000U, // flag - has finalizer - RefProfiled = 0x20000000U, // flag - is in profiling table - RefNoProfiling = 0x10000000U, // flag - must not profile - RefFlags = 0xFFFF0000U, + // flags to malloc + FlagNoPointers = 1<<0, // no pointers here + FlagNoProfiling = 1<<1, // must not profile + FlagNoGC = 1<<2, // must not free or scan for pointers }; void runtime_Mprof_Init(void); diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c index 81e54b07dbf..cd3d6ca5ede 100644 --- a/libgo/runtime/mcentral.c +++ b/libgo/runtime/mcentral.c @@ -114,13 +114,11 @@ static void MCentral_Free(MCentral *c, void *v) { MSpan *s; - PageID page; - MLink *p, *next; + MLink *p; int32 size; // Find span for v. - page = (uintptr)v >> PageShift; - s = runtime_MHeap_Lookup(&runtime_mheap, page); + s = runtime_MHeap_Lookup(&runtime_mheap, v); if(s == nil || s->ref == 0) runtime_throw("invalid free"); @@ -140,16 +138,8 @@ MCentral_Free(MCentral *c, void *v) if(--s->ref == 0) { size = runtime_class_to_size[c->sizeclass]; runtime_MSpanList_Remove(s); - // The second word of each freed block indicates - // whether it needs to be zeroed. The first word - // is the link pointer and must always be cleared. - for(p=s->freelist; p; p=next) { - next = p->next; - if(size > (int32)sizeof(uintptr) && ((uintptr*)p)[1] != 0) - runtime_memclr((byte*)p, size); - else - p->next = nil; - } + runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift); + *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing s->freelist = nil; c->nfree -= (s->npages << PageShift) / size; runtime_unlock(c); @@ -159,7 +149,7 @@ MCentral_Free(MCentral *c, void *v) } void -runtime_MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj) +runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj) { int32 size; int32 npages; @@ -168,7 +158,7 @@ runtime_MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 * size = runtime_class_to_size[sizeclass]; *npagesp = npages; *sizep = size; - *nobj = (npages << PageShift) / (size + RefcountOverhead); + *nobj = (npages << PageShift) / size; } // Fetch a new span from the heap and @@ -176,7 +166,8 @@ runtime_MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 * static bool MCentral_Grow(MCentral *c) { - int32 i, n, npages, size; + int32 i, n, npages; + uintptr size; MLink **tailp, *v; byte *p; MSpan *s; @@ -193,7 +184,7 @@ MCentral_Grow(MCentral *c) // Carve span into sequence of blocks. tailp = &s->freelist; p = (byte*)(s->start << PageShift); - s->gcref = (uint32*)(p + size*n); + s->limit = p + size*n; for(i=0; i<n; i++) { v = (MLink*)p; *tailp = v; @@ -201,6 +192,7 @@ MCentral_Grow(MCentral *c) p += size; } *tailp = nil; + runtime_markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift)); runtime_lock(c); c->nfree += n; diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c index 4d6c7420907..f62a4d37ee7 100644 --- a/libgo/runtime/mem.c +++ b/libgo/runtime/mem.c @@ -38,12 +38,11 @@ runtime_SysAlloc(uintptr n) p = runtime_mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0); if (p == MAP_FAILED) { if(errno == EACCES) { - printf("mmap: access denied\n"); - printf("If you're running SELinux, enable execmem for this process.\n"); - } else { - printf("mmap: errno=%d\n", errno); + printf("runtime: mmap: access denied\n"); + printf("if you're running SELinux, enable execmem for this process.\n"); + exit(2); } - exit(2); + return nil; } return p; } @@ -63,14 +62,61 @@ runtime_SysFree(void *v, uintptr n) runtime_munmap(v, n); } +void* +runtime_SysReserve(void *v, uintptr n) +{ + int fd = -1; + + // On 64-bit, people with ulimit -v set complain if we reserve too + // much address space. Instead, assume that the reservation is okay + // and check the assumption in SysMap. + if(sizeof(void*) == 8) + return v; + +#ifdef USE_DEV_ZERO + if (dev_zero == -1) { + dev_zero = open("/dev/zero", O_RDONLY); + if (dev_zero < 0) { + printf("open /dev/zero: errno=%d\n", errno); + exit(2); + } + } + fd = dev_zero; +#endif + + return runtime_mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, fd, 0); +} + void -runtime_SysMemInit(void) +runtime_SysMap(void *v, uintptr n) { - // Code generators assume that references to addresses - // on the first page will fault. Map the page explicitly with - // no permissions, to head off possible bugs like the system - // allocating that page as the virtual address space fills. - // Ignore any error, since other systems might be smart - // enough to never allow anything there. - runtime_mmap(nil, 4096, PROT_NONE, MAP_FIXED|MAP_ANON|MAP_PRIVATE, -1, 0); + void *p; + int fd = -1; + + mstats.sys += n; + +#ifdef USE_DEV_ZERO + if (dev_zero == -1) { + dev_zero = open("/dev/zero", O_RDONLY); + if (dev_zero < 0) { + printf("open /dev/zero: errno=%d\n", errno); + exit(2); + } + } + fd = dev_zero; +#endif + + // On 64-bit, we don't actually have v reserved, so tread carefully. + if(sizeof(void*) == 8) { + p = runtime_mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0); + if(p != v) { + runtime_printf("runtime: address space conflict: map(%p) = %p\n", v, p); + runtime_throw("runtime: address space conflict"); + } + return; + } + + p = runtime_mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, fd, 0); + if(p != v) + runtime_throw("runtime: cannot map pages in arena address space"); } diff --git a/libgo/runtime/mem_posix_memalign.c b/libgo/runtime/mem_posix_memalign.c index 3855dfcf185..2318be8da11 100644 --- a/libgo/runtime/mem_posix_memalign.c +++ b/libgo/runtime/mem_posix_memalign.c @@ -32,7 +32,13 @@ runtime_SysFree(void *v, uintptr n) free(v); } +void* +runtime_SysReserve(void *v, uintptr n) +{ + return runtime_SysAlloc(n); +} + void -runtime_SysMemInit(void) +runtime_SysMap(void *v, uintptr n) { } diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c index 23c0d7a1663..04d58dddda9 100644 --- a/libgo/runtime/mfinal.c +++ b/libgo/runtime/mfinal.c @@ -5,6 +5,9 @@ #include "runtime.h" #include "malloc.h" +// Lock to protect finalizer data structures. +// Cannot reuse mheap.Lock because the finalizer +// maintenance requires allocation. static Lock finlock; void @@ -95,7 +98,6 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft) { Fintab newtab; int32 i; - uint32 *ref; byte *base; Finalizer *e; @@ -110,25 +112,22 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft) runtime_throw("finalizer deadlock"); runtime_lock(&finlock); - if(!runtime_mlookup(p, &base, nil, nil, &ref) || p != base) { + if(!runtime_mlookup(p, &base, nil, nil) || p != base) { runtime_unlock(&finlock); __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); runtime_throw("addfinalizer on invalid pointer"); } if(f == nil) { - if(*ref & RefHasFinalizer) { - lookfintab(&fintab, p, 1); - *ref &= ~RefHasFinalizer; - } + lookfintab(&fintab, p, 1); goto unlock; } - if(*ref & RefHasFinalizer) { + if(lookfintab(&fintab, p, 0)) { runtime_unlock(&finlock); __sync_bool_compare_and_swap(&m->holds_finlock, 1, 0); runtime_throw("double finalizer"); } - *ref |= RefHasFinalizer; + runtime_setblockspecial(p); if(fintab.nkey >= fintab.max/2+fintab.max/4) { // keep table at most 3/4 full: @@ -144,7 +143,7 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft) newtab.max *= 3; } - newtab.key = runtime_mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1); + newtab.key = runtime_mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1); newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); for(i=0; i<fintab.max; i++) { diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c index f2703ab0263..27fc3cdcc4d 100644 --- a/libgo/runtime/mgc0.c +++ b/libgo/runtime/mgc0.c @@ -2,28 +2,65 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Garbage collector -- step 0. -// -// Stop the world, mark and sweep garbage collector. -// NOT INTENDED FOR PRODUCTION USE. -// -// A mark and sweep collector provides a way to exercise -// and test the memory allocator and the stack walking machinery -// without also needing to get reference counting -// exactly right. +// Garbage collector. #include "runtime.h" #include "malloc.h" enum { - Debug = 0 + Debug = 0, + UseCas = 1, + PtrSize = sizeof(void*), + + // Four bits per word (see #defines below). + wordsPerBitmapWord = sizeof(void*)*8/4, + bitShift = sizeof(void*)*8/4, }; -typedef struct BlockList BlockList; -struct BlockList +// Bits in per-word bitmap. +// #defines because enum might not be able to hold the values. +// +// Each word in the bitmap describes wordsPerBitmapWord words +// of heap memory. There are 4 bitmap bits dedicated to each heap word, +// so on a 64-bit system there is one bitmap word per 16 heap words. +// The bits in the word are packed together by type first, then by +// heap location, so each 64-bit bitmap word consists of, from top to bottom, +// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, +// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits. +// This layout makes it easier to iterate over the bits of a given type. +// +// The bitmap starts at mheap.arena_start and extends *backward* from +// there. On a 64-bit system the off'th word in the arena is tracked by +// the off/16+1'th word before mheap.arena_start. (On a 32-bit system, +// the only difference is that the divisor is 8.) +// +// To pull out the bits corresponding to a given pointer p, we use: +// +// off = p - (uintptr*)mheap.arena_start; // word offset +// b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1; +// shift = off % wordsPerBitmapWord +// bits = *b >> shift; +// /* then test bits & bitAllocated, bits & bitMarked, etc. */ +// +#define bitAllocated ((uintptr)1<<(bitShift*0)) +#define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ +#define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ +#define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ +#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ + +#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) + +static uint64 nlookup; +static uint64 nsizelookup; +static uint64 naddrlookup; +static int32 gctrace; + +typedef struct Workbuf Workbuf; +struct Workbuf { - byte *obj; - uintptr size; + Workbuf *next; + uintptr nw; + byte *w[2048-2]; }; static bool finstarted; @@ -31,83 +68,265 @@ static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER; static Finalizer *finq; static int32 fingwait; -static BlockList *bl, *ebl; static void runfinq(void*); - -enum { - PtrSize = sizeof(void*) -}; - +static Workbuf* getempty(Workbuf*); +static Workbuf* getfull(Workbuf*); + +// scanblock scans a block of n bytes starting at pointer b for references +// to other objects, scanning any it finds recursively until there are no +// unscanned objects left. Instead of using an explicit recursion, it keeps +// a work list in the Workbuf* structures and loops in the main function +// body. Keeping an explicit work list is easier on the stack allocator and +// more efficient. static void scanblock(byte *b, int64 n) { - int32 off; - void *obj; - uintptr size; - uint32 *refp, ref; + byte *obj, *arena_start, *p; void **vp; - int64 i; - BlockList *w; - - w = bl; - w->obj = b; - w->size = n; - w++; + uintptr size, *bitp, bits, shift, i, j, x, xbits, off; + MSpan *s; + PageID k; + void **bw, **w, **ew; + Workbuf *wbuf; - while(w > bl) { - w--; - b = w->obj; - n = w->size; + // Memory arena parameters. + arena_start = runtime_mheap.arena_start; + + wbuf = nil; // current work buffer + ew = nil; // end of work buffer + bw = nil; // beginning of work buffer + w = nil; // current pointer into work buffer + + // Align b to a word boundary. + off = (uintptr)b & (PtrSize-1); + if(off != 0) { + b += PtrSize - off; + n -= PtrSize - off; + } + for(;;) { + // Each iteration scans the block b of length n, queueing pointers in + // the work buffer. if(Debug > 1) runtime_printf("scanblock %p %lld\n", b, (long long) n); - off = (uint32)(uintptr)b & (PtrSize-1); - if(off) { - b += PtrSize - off; - n -= PtrSize - off; - } - + vp = (void**)b; n /= PtrSize; - for(i=0; i<n; i++) { - obj = vp[i]; - if(obj == nil) + for(i=0; i<(uintptr)n; i++) { + obj = (byte*)vp[i]; + + // Words outside the arena cannot be pointers. + if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used) continue; - if(runtime_mheap.min <= (byte*)obj && (byte*)obj < runtime_mheap.max) { - if(runtime_mlookup(obj, (byte**)&obj, &size, nil, &refp)) { - ref = *refp; - switch(ref & ~RefFlags) { - case RefNone: - if(Debug > 1) - runtime_printf("found at %p: ", &vp[i]); - *refp = RefSome | (ref & RefFlags); - if(!(ref & RefNoPointers)) { - if(w >= ebl) - runtime_throw("scanblock: garbage collection stack overflow"); - w->obj = obj; - w->size = size; - w++; - } - break; - } + + // obj may be a pointer to a live object. + // Try to find the beginning of the object. + + // Round down to word boundary. + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + + // Find bits for this word. + off = (uintptr*)obj - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Pointing at the beginning of a block? + if((bits & (bitAllocated|bitBlockBoundary)) != 0) + goto found; + + // Pointing just past the beginning? + // Scan backward a little to find a block boundary. + for(j=shift; j-->0; ) { + if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { + obj = (byte*)obj - (shift-j)*PtrSize; + shift = j; + bits = xbits>>shift; + goto found; } } + + // Otherwise consult span table to find beginning. + // (Manually inlined copy of MHeap_LookupMaybe.) + nlookup++; + naddrlookup++; + k = (uintptr)obj>>PageShift; + x = k; + if(sizeof(void*) == 8) + x -= (uintptr)arena_start>>PageShift; + s = runtime_mheap.map[x]; + if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) + continue; + p = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass == 0) { + obj = p; + } else { + if((byte*)obj >= (byte*)s->limit) + continue; + size = runtime_class_to_size[s->sizeclass]; + int32 i = ((byte*)obj - p)/size; + obj = p+i*size; + } + + // Now that we know the object header, reload bits. + off = (uintptr*)obj - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + found: + // Now we have bits, bitp, and shift correct for + // obj pointing at the base of the object. + // If not allocated or already marked, done. + if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0) + continue; + *bitp |= bitMarked<<shift; + + // If object has no pointers, don't need to scan further. + if((bits & bitNoPointers) != 0) + continue; + + // If buffer is full, get a new one. + if(w >= ew) { + wbuf = getempty(wbuf); + bw = (void**)wbuf->w; + w = bw; + ew = bw + nelem(wbuf->w); + } + *w++ = obj; } + + // Done scanning [b, b+n). Prepare for the next iteration of + // the loop by setting b and n to the parameters for the next block. + + // Fetch b from the work buffers. + if(w <= bw) { + // Emptied our buffer: refill. + wbuf = getfull(wbuf); + if(wbuf == nil) + break; + bw = (void**)wbuf->w; + ew = (void**)(wbuf->w + nelem(wbuf->w)); + w = bw+wbuf->nw; + } + b = *--w; + + // Figure out n = size of b. Start by loading bits for b. + off = (uintptr*)b - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Might be small; look for nearby block boundary. + // A block boundary is marked by either bitBlockBoundary + // or bitAllocated being set (see notes near their definition). + enum { + boundary = bitBlockBoundary|bitAllocated + }; + // Look for a block boundary both after and before b + // in the same bitmap word. + // + // A block boundary j words after b is indicated by + // bits>>j & boundary + // assuming shift+j < bitShift. (If shift+j >= bitShift then + // we'll be bleeding other bit types like bitMarked into our test.) + // Instead of inserting the conditional shift+j < bitShift into the loop, + // we can let j range from 1 to bitShift as long as we first + // apply a mask to keep only the bits corresponding + // to shift+j < bitShift aka j < bitShift-shift. + bits &= (boundary<<(bitShift-shift)) - boundary; + + // A block boundary j words before b is indicated by + // xbits>>(shift-j) & boundary + // (assuming shift >= j). There is no cleverness here + // avoid the test, because when j gets too large the shift + // turns negative, which is undefined in C. + + for(j=1; j<bitShift; j++) { + if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) { + n = j*PtrSize; + goto scan; + } + } + + // Fall back to asking span about size class. + // (Manually inlined copy of MHeap_Lookup.) + nlookup++; + nsizelookup++; + x = (uintptr)b>>PageShift; + if(sizeof(void*) == 8) + x -= (uintptr)arena_start>>PageShift; + s = runtime_mheap.map[x]; + if(s->sizeclass == 0) + n = s->npages<<PageShift; + else + n = runtime_class_to_size[s->sizeclass]; + scan:; + } +} + +static struct { + Workbuf *full; + Workbuf *empty; + byte *chunk; + uintptr nchunk; +} work; + +// Get an empty work buffer off the work.empty list, +// allocating new buffers as needed. +static Workbuf* +getempty(Workbuf *b) +{ + if(b != nil) { + b->nw = nelem(b->w); + b->next = work.full; + work.full = b; + } + b = work.empty; + if(b != nil) { + work.empty = b->next; + return b; + } + + if(work.nchunk < sizeof *b) { + work.nchunk = 1<<20; + work.chunk = runtime_SysAlloc(work.nchunk); } + b = (Workbuf*)work.chunk; + work.chunk += sizeof *b; + work.nchunk -= sizeof *b; + return b; } +// Get a full work buffer off the work.full list, or return nil. +static Workbuf* +getfull(Workbuf *b) +{ + if(b != nil) { + b->nw = 0; + b->next = work.empty; + work.empty = b; + } + b = work.full; + if(b != nil) + work.full = b->next; + return b; +} + +// Scanstack calls scanblock on each of gp's stack segments. static void markfin(void *v) { uintptr size; - uint32 *refp; size = 0; - refp = nil; - if(!runtime_mlookup(v, (byte**)&v, &size, nil, &refp) || !(*refp & RefHasFinalizer)) + if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v)) runtime_throw("mark - finalizer inconsistency"); - + // do not mark the finalizer block itself. just mark the things it points at. scanblock(v, size); } @@ -131,32 +350,12 @@ __go_register_gc_roots (struct root_list* r) roots = r; } +// Mark static void mark(void) { - uintptr blsize, nobj; struct root_list *pl; - // Figure out how big an object stack we need. - // Get a new one if we need more than we have - // or we need significantly less than we have. - nobj = mstats.heap_objects; - if(nobj > (uintptr)(ebl - bl) || nobj < (uintptr)(ebl-bl)/4) { - if(bl != nil) - runtime_SysFree(bl, (byte*)ebl - (byte*)bl); - - // While we're allocated a new object stack, - // add 20% headroom and also round up to - // the nearest page boundary, since mmap - // will anyway. - nobj = nobj * 12/10; - blsize = nobj * sizeof *bl; - blsize = (blsize + 4095) & ~4095; - nobj = blsize / sizeof *bl; - bl = runtime_SysAlloc(blsize); - ebl = bl + nobj; - } - for(pl = roots; pl != nil; pl = pl->next) { struct root* pr = &pl->roots[0]; while(1) { @@ -179,97 +378,85 @@ mark(void) runtime_walkfintab(markfin, scanblock); } -// free RefNone, free & queue finalizers for RefNone|RefHasFinalizer, reset RefSome +// Sweep frees or calls finalizers for blocks not marked in the mark phase. +// It clears the mark bits in preparation for the next GC round. static void -sweepspan(MSpan *s) +sweep(void) { - int32 n, npages, size; + MSpan *s; + int32 cl, n, npages; + uintptr size; byte *p; - uint32 ref, *gcrefp, *gcrefep; MCache *c; Finalizer *f; - p = (byte*)(s->start << PageShift); - if(s->sizeclass == 0) { - // Large block. - ref = s->gcref0; - switch(ref & ~(RefFlags^RefHasFinalizer)) { - case RefNone: - // Free large object. - mstats.alloc -= s->npages<<PageShift; - mstats.nfree++; - runtime_memclr(p, s->npages<<PageShift); - if(ref & RefProfiled) - runtime_MProf_Free(p, s->npages<<PageShift); - s->gcref0 = RefFree; - runtime_MHeap_Free(&runtime_mheap, s, 1); - break; - case RefNone|RefHasFinalizer: - f = runtime_getfinalizer(p, 1); - if(f == nil) - runtime_throw("finalizer inconsistency"); - f->arg = p; - f->next = finq; - finq = f; - ref &= ~RefHasFinalizer; - // fall through - case RefSome: - case RefSome|RefHasFinalizer: - s->gcref0 = RefNone | (ref&RefFlags); - break; + for(s = runtime_mheap.allspans; s != nil; s = s->allnext) { + if(s->state != MSpanInUse) + continue; + + p = (byte*)(s->start << PageShift); + cl = s->sizeclass; + if(cl == 0) { + size = s->npages<<PageShift; + n = 1; + } else { + // Chunk full of small blocks. + size = runtime_class_to_size[cl]; + npages = runtime_class_to_allocnpages[cl]; + n = (npages << PageShift) / size; } - return; - } + + // sweep through n objects of given size starting at p. + for(; n > 0; n--, p += size) { + uintptr off, *bitp, shift, bits; - // Chunk full of small blocks. - runtime_MGetSizeClassInfo(s->sizeclass, &size, &npages, &n); - gcrefp = s->gcref; - gcrefep = s->gcref + n; - for(; gcrefp < gcrefep; gcrefp++, p += size) { - ref = *gcrefp; - if(ref < RefNone) // RefFree or RefStack - continue; - switch(ref & ~(RefFlags^RefHasFinalizer)) { - case RefNone: - // Free small object. - if(ref & RefProfiled) + off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; + bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + bits = *bitp>>shift; + + if((bits & bitAllocated) == 0) + continue; + + if((bits & bitMarked) != 0) { + *bitp &= ~(bitMarked<<shift); + continue; + } + + if((bits & bitSpecial) != 0) { + // Special means it has a finalizer or is being profiled. + f = runtime_getfinalizer(p, 1); + if(f != nil) { + f->arg = p; + f->next = finq; + finq = f; + continue; + } runtime_MProf_Free(p, size); - *gcrefp = RefFree; - c = m->mcache; - if(size > (int32)sizeof(uintptr)) - ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" + } + + // Mark freed; restore block boundary bit. + *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + + if(s->sizeclass == 0) { + // Free large span. + runtime_unmarkspan(p, 1<<PageShift); + *(uintptr*)p = 1; // needs zeroing + runtime_MHeap_Free(&runtime_mheap, s, 1); + } else { + // Free small object. + c = m->mcache; + if(size > sizeof(uintptr)) + ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" + mstats.by_size[s->sizeclass].nfree++; + runtime_MCache_Free(c, p, s->sizeclass, size); + } mstats.alloc -= size; mstats.nfree++; - mstats.by_size[s->sizeclass].nfree++; - runtime_MCache_Free(c, p, s->sizeclass, size); - break; - case RefNone|RefHasFinalizer: - f = runtime_getfinalizer(p, 1); - if(f == nil) - runtime_throw("finalizer inconsistency"); - f->arg = p; - f->next = finq; - finq = f; - ref &= ~RefHasFinalizer; - // fall through - case RefSome: - case RefSome|RefHasFinalizer: - *gcrefp = RefNone | (ref&RefFlags); - break; } } } -static void -sweep(void) -{ - MSpan *s; - - for(s = runtime_mheap.allspans; s != nil; s = s->allnext) - if(s->state == MSpanInUse) - sweepspan(s); -} - static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER; // Initialized from $GOGC. GOGC=off means no gc. @@ -286,7 +473,8 @@ static int32 gcpercent = -2; void runtime_gc(int32 force __attribute__ ((unused))) { - int64 t0, t1; + int64 t0, t1, t2, t3; + uint64 heap0, heap1, obj0, obj1; char *p; Finalizer *fp; @@ -309,29 +497,65 @@ runtime_gc(int32 force __attribute__ ((unused))) gcpercent = -1; else gcpercent = runtime_atoi(p); + + p = runtime_getenv("GOGCTRACE"); + if(p != nil) + gctrace = runtime_atoi(p); } if(gcpercent < 0) return; pthread_mutex_lock(&finqlock); pthread_mutex_lock(&gcsema); - m->locks++; // disable gc during the mallocs in newproc + if(!force && mstats.heap_alloc < mstats.next_gc) { + pthread_mutex_unlock(&gcsema); + pthread_mutex_unlock(&finqlock); + return; + } + t0 = runtime_nanotime(); + nlookup = 0; + nsizelookup = 0; + naddrlookup = 0; + + m->gcing = 1; runtime_stoptheworld(); - if(force || mstats.heap_alloc >= mstats.next_gc) { - __go_cachestats(); - mark(); - sweep(); - __go_stealcache(); - mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; - } + if(runtime_mheap.Lock.key != 0) + runtime_throw("runtime_mheap locked during gc"); + __go_cachestats(); + heap0 = mstats.heap_alloc; + obj0 = mstats.nmalloc - mstats.nfree; + + mark(); t1 = runtime_nanotime(); + sweep(); + t2 = runtime_nanotime(); + __go_stealcache(); + + mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; + m->gcing = 0; + + m->locks++; // disable gc during the mallocs in newproc + + heap1 = mstats.heap_alloc; + obj1 = mstats.nmalloc - mstats.nfree; + + t3 = runtime_nanotime(); + mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; + mstats.pause_total_ns += t3 - t0; mstats.numgc++; - mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t1 - t0; - mstats.pause_total_ns += t1 - t0; if(mstats.debuggc) - runtime_printf("pause %llu\n", (unsigned long long)t1-t0); + runtime_printf("pause %llu\n", (unsigned long long)t3-t0); + + if(gctrace) { + runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n", + mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000, + (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1, + (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree, + (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup); + } + pthread_mutex_unlock(&gcsema); runtime_starttheworld(); @@ -350,6 +574,9 @@ runtime_gc(int32 force __attribute__ ((unused))) } m->locks--; pthread_mutex_unlock(&finqlock); + + if(gctrace > 1 && !force) + runtime_gc(1); } static void @@ -385,6 +612,202 @@ runfinq(void* dummy) } } +#define runtime_gomaxprocs 2 + +// mark the block at v of size n as allocated. +// If noptr is true, mark it as having no pointers. +void +runtime_markallocated(void *v, uintptr n, bool noptr) +{ + uintptr *b, obits, bits, off, shift; + + // if(0) + // runtime_printf("markallocated %p+%p\n", v, n); + + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + runtime_throw("markallocated: bad pointer"); + + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + for(;;) { + obits = *b; + bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift); + if(noptr) + bits |= bitNoPointers<<shift; + if(runtime_gomaxprocs == 1) { + *b = bits; + break; + } else { + // gomaxprocs > 1: use atomic op + if(runtime_casp((void**)b, (void*)obits, (void*)bits)) + break; + } + } +} + +// mark the block at v of size n as freed. +void +runtime_markfreed(void *v, uintptr n) +{ + uintptr *b, obits, bits, off, shift; + + // if(0) + // runtime_printf("markallocated %p+%p\n", v, n); + + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + runtime_throw("markallocated: bad pointer"); + + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + for(;;) { + obits = *b; + bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + if(runtime_gomaxprocs == 1) { + *b = bits; + break; + } else { + // gomaxprocs > 1: use atomic op + if(runtime_casp((void**)b, (void*)obits, (void*)bits)) + break; + } + } +} + +// check that the block at v of size n is marked freed. +void +runtime_checkfreed(void *v, uintptr n) +{ + uintptr *b, bits, off, shift; + + if(!runtime_checking) + return; + + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + return; // not allocated, so okay + + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + bits = *b>>shift; + if((bits & bitAllocated) != 0) { + runtime_printf("checkfreed %p+%p: off=%p have=%p\n", + v, (void*)n, (void*)off, (void*)(bits & bitMask)); + runtime_throw("checkfreed: not freed"); + } +} + +// mark the span of memory at v as having n blocks of the given size. +// if leftover is true, there is left over space at the end of the span. +void +runtime_markspan(void *v, uintptr size, uintptr n, bool leftover) +{ + uintptr *b, off, shift; + byte *p; + + if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + runtime_throw("markspan: bad pointer"); + + p = v; + if(leftover) // mark a boundary just past end of last block too + n++; + for(; n-- > 0; p += size) { + // Okay to use non-atomic ops here, because we control + // the entire span, and each bitmap word has bits for only + // one span, so no other goroutines are changing these + // bitmap words. + off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); + } +} + +// unmark the span of memory at v of length n bytes. +void +runtime_unmarkspan(void *v, uintptr n) +{ + uintptr *p, *b, off; + + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + runtime_throw("markspan: bad pointer"); + + p = v; + off = p - (uintptr*)runtime_mheap.arena_start; // word offset + if(off % wordsPerBitmapWord != 0) + runtime_throw("markspan: unaligned pointer"); + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + n /= PtrSize; + if(n%wordsPerBitmapWord != 0) + runtime_throw("unmarkspan: unaligned length"); + // Okay to use non-atomic ops here, because we control + // the entire span, and each bitmap word has bits for only + // one span, so no other goroutines are changing these + // bitmap words. + n /= wordsPerBitmapWord; + while(n-- > 0) + *b-- = 0; +} + +bool +runtime_blockspecial(void *v) +{ + uintptr *b, off, shift; + + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + return (*b & (bitSpecial<<shift)) != 0; +} + +void +runtime_setblockspecial(void *v) +{ + uintptr *b, off, shift, bits, obits; + + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + + for(;;) { + obits = *b; + bits = obits | (bitSpecial<<shift); + if(runtime_gomaxprocs == 1) { + *b = bits; + break; + } else { + // gomaxprocs > 1: use atomic op + if(runtime_casp((void**)b, (void*)obits, (void*)bits)) + break; + } + } +} + +void +runtime_MHeap_MapBits(MHeap *h) +{ + // Caller has added extra mappings to the arena. + // Add extra mappings of bitmap words as needed. + // We allocate extra bitmap pieces in chunks of bitmapChunk. + enum { + bitmapChunk = 8192 + }; + uintptr n; + + n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; + n = (n+bitmapChunk-1) & ~(bitmapChunk-1); + if(h->bitmap_mapped >= n) + return; + + runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped); + h->bitmap_mapped = n; +} + void __go_enable_gc() { diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c index 52c6d8c1baa..b36df258818 100644 --- a/libgo/runtime/mheap.c +++ b/libgo/runtime/mheap.c @@ -42,7 +42,6 @@ runtime_MHeap_Init(MHeap *h, void *(*alloc)(uintptr)) runtime_initlock(h); runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), alloc, RecordSpan, h); runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), alloc, nil, nil); - runtime_MHeapMap_Init(&h->map, alloc); // h->mapcache needs no init for(i=0; i<nelem(h->free); i++) runtime_MSpanList_Init(&h->free[i]); @@ -80,6 +79,7 @@ MHeap_AllocLocked(MHeap *h, uintptr npage, int32 sizeclass) { uintptr n; MSpan *s, *t; + PageID p; // Try in fixed-size lists up to max. for(n=npage; n < nelem(h->free); n++) { @@ -113,18 +113,29 @@ HaveSpan: mstats.mspan_sys = h->spanalloc.sys; runtime_MSpan_Init(t, s->start + npage, s->npages - npage); s->npages = npage; - runtime_MHeapMap_Set(&h->map, t->start - 1, s); - runtime_MHeapMap_Set(&h->map, t->start, t); - runtime_MHeapMap_Set(&h->map, t->start + t->npages - 1, t); + p = t->start; + if(sizeof(void*) == 8) + p -= ((uintptr)h->arena_start>>PageShift); + if(p > 0) + h->map[p-1] = s; + h->map[p] = t; + h->map[p+t->npages-1] = t; + *(uintptr*)(t->start<<PageShift) = *(uintptr*)(s->start<<PageShift); // copy "needs zeroing" mark t->state = MSpanInUse; MHeap_FreeLocked(h, t); } + if(*(uintptr*)(s->start<<PageShift) != 0) + runtime_memclr((byte*)(s->start<<PageShift), s->npages<<PageShift); + // Record span info, because gc needs to be // able to map interior pointer to containing span. s->sizeclass = sizeclass; + p = s->start; + if(sizeof(void*) == 8) + p -= ((uintptr)h->arena_start>>PageShift); for(n=0; n<npage; n++) - runtime_MHeapMap_Set(&h->map, s->start+n, s); + h->map[p+n] = s; return s; } @@ -162,6 +173,7 @@ MHeap_Grow(MHeap *h, uintptr npage) uintptr ask; void *v; MSpan *s; + PageID p; // Ask for a big chunk, to reduce the number of mappings // the operating system needs to track; also amortizes @@ -169,68 +181,72 @@ MHeap_Grow(MHeap *h, uintptr npage) // Allocate a multiple of 64kB (16 pages). npage = (npage+15)&~15; ask = npage<<PageShift; - if(ask < HeapAllocChunk) + if(ask > (uintptr)(h->arena_end - h->arena_used)) + return false; + if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used) ask = HeapAllocChunk; - v = runtime_SysAlloc(ask); + v = runtime_MHeap_SysAlloc(h, ask); if(v == nil) { if(ask > (npage<<PageShift)) { ask = npage<<PageShift; - v = runtime_SysAlloc(ask); + v = runtime_MHeap_SysAlloc(h, ask); } if(v == nil) return false; } mstats.heap_sys += ask; - if((byte*)v < h->min || h->min == nil) - h->min = v; - if((byte*)v+ask > h->max) - h->max = (byte*)v+ask; - - // NOTE(rsc): In tcmalloc, if we've accumulated enough - // system allocations, the heap map gets entirely allocated - // in 32-bit mode. (In 64-bit mode that's not practical.) - if(!runtime_MHeapMap_Preallocate(&h->map, ((uintptr)v>>PageShift) - 1, (ask>>PageShift) + 2)) { - runtime_SysFree(v, ask); - return false; - } - // Create a fake "in use" span and free it, so that the // right coalescing happens. s = runtime_FixAlloc_Alloc(&h->spanalloc); mstats.mspan_inuse = h->spanalloc.inuse; mstats.mspan_sys = h->spanalloc.sys; runtime_MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift); - runtime_MHeapMap_Set(&h->map, s->start, s); - runtime_MHeapMap_Set(&h->map, s->start + s->npages - 1, s); + p = s->start; + if(sizeof(void*) == 8) + p -= ((uintptr)h->arena_start>>PageShift); + h->map[p] = s; + h->map[p + s->npages - 1] = s; s->state = MSpanInUse; MHeap_FreeLocked(h, s); return true; } -// Look up the span at the given page number. -// Page number is guaranteed to be in map +// Look up the span at the given address. +// Address is guaranteed to be in map // and is guaranteed to be start or end of span. MSpan* -runtime_MHeap_Lookup(MHeap *h, PageID p) +runtime_MHeap_Lookup(MHeap *h, void *v) { - return runtime_MHeapMap_Get(&h->map, p); + uintptr p; + + p = (uintptr)v; + if(sizeof(void*) == 8) + p -= (uintptr)h->arena_start; + return h->map[p >> PageShift]; } -// Look up the span at the given page number. -// Page number is *not* guaranteed to be in map +// Look up the span at the given address. +// Address is *not* guaranteed to be in map // and may be anywhere in the span. // Map entries for the middle of a span are only // valid for allocated spans. Free spans may have // other garbage in their middles, so we have to // check for that. MSpan* -runtime_MHeap_LookupMaybe(MHeap *h, PageID p) +runtime_MHeap_LookupMaybe(MHeap *h, void *v) { MSpan *s; + PageID p, q; - s = runtime_MHeapMap_GetMaybe(&h->map, p); + if((byte*)v < h->arena_start || (byte*)v >= h->arena_used) + return nil; + p = (uintptr)v>>PageShift; + q = p; + if(sizeof(void*) == 8) + q -= (uintptr)h->arena_start >> PageShift; + s = h->map[q]; if(s == nil || p < s->start || p - s->start >= s->npages) return nil; if(s->state != MSpanInUse) @@ -259,7 +275,9 @@ runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct) static void MHeap_FreeLocked(MHeap *h, MSpan *s) { + uintptr *sp, *tp; MSpan *t; + PageID p; if(s->state != MSpanInUse || s->ref != 0) { // runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref); @@ -267,21 +285,30 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) } s->state = MSpanFree; runtime_MSpanList_Remove(s); + sp = (uintptr*)(s->start<<PageShift); // Coalesce with earlier, later spans. - if((t = runtime_MHeapMap_Get(&h->map, s->start - 1)) != nil && t->state != MSpanInUse) { + p = s->start; + if(sizeof(void*) == 8) + p -= (uintptr)h->arena_start >> PageShift; + if(p > 0 && (t = h->map[p-1]) != nil && t->state != MSpanInUse) { + tp = (uintptr*)(t->start<<PageShift); + *tp |= *sp; // propagate "needs zeroing" mark s->start = t->start; s->npages += t->npages; - runtime_MHeapMap_Set(&h->map, s->start, s); + p -= t->npages; + h->map[p] = s; runtime_MSpanList_Remove(t); t->state = MSpanDead; runtime_FixAlloc_Free(&h->spanalloc, t); mstats.mspan_inuse = h->spanalloc.inuse; mstats.mspan_sys = h->spanalloc.sys; } - if((t = runtime_MHeapMap_Get(&h->map, s->start + s->npages)) != nil && t->state != MSpanInUse) { + if(p+s->npages < nelem(h->map) && (t = h->map[p+s->npages]) != nil && t->state != MSpanInUse) { + tp = (uintptr*)(t->start<<PageShift); + *sp |= *tp; // propagate "needs zeroing" mark s->npages += t->npages; - runtime_MHeapMap_Set(&h->map, s->start + s->npages - 1, s); + h->map[p + s->npages - 1] = s; runtime_MSpanList_Remove(t); t->state = MSpanDead; runtime_FixAlloc_Free(&h->spanalloc, t); @@ -341,10 +368,14 @@ runtime_MSpanList_IsEmpty(MSpan *list) void runtime_MSpanList_Insert(MSpan *list, MSpan *span) { - if(span->next != nil || span->prev != nil) + if(span->next != nil || span->prev != nil) { + // runtime_printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev); runtime_throw("MSpanList_Insert"); + } span->next = list->next; span->prev = list; span->next->prev = span; span->prev->next = span; } + + diff --git a/libgo/runtime/mheapmap32.c b/libgo/runtime/mheapmap32.c deleted file mode 100644 index 547c602fe3f..00000000000 --- a/libgo/runtime/mheapmap32.c +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Heap map, 32-bit version -// See malloc.h and mheap.c for overview. - -#include "runtime.h" -#include "malloc.h" - -#if __SIZEOF_POINTER__ == 4 - -// 3-level radix tree mapping page ids to Span*. -void -runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr)) -{ - m->allocator = allocator; -} - -MSpan* -runtime_MHeapMap_Get(MHeapMap *m, PageID k) -{ - int32 i1, i2; - - i2 = k & MHeapMap_Level2Mask; - k >>= MHeapMap_Level2Bits; - i1 = k & MHeapMap_Level1Mask; - k >>= MHeapMap_Level1Bits; - if(k != 0) - runtime_throw("MHeapMap_Get"); - - return m->p[i1]->s[i2]; -} - -MSpan* -runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k) -{ - int32 i1, i2; - MHeapMapNode2 *p2; - - i2 = k & MHeapMap_Level2Mask; - k >>= MHeapMap_Level2Bits; - i1 = k & MHeapMap_Level1Mask; - k >>= MHeapMap_Level1Bits; - if(k != 0) - runtime_throw("MHeapMap_Get"); - - p2 = m->p[i1]; - if(p2 == nil) - return nil; - return p2->s[i2]; -} - -void -runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *s) -{ - int32 i1, i2; - - i2 = k & MHeapMap_Level2Mask; - k >>= MHeapMap_Level2Bits; - i1 = k & MHeapMap_Level1Mask; - k >>= MHeapMap_Level1Bits; - if(k != 0) - runtime_throw("MHeapMap_Set"); - - m->p[i1]->s[i2] = s; -} - -// Allocate the storage required for entries [k, k+1, ..., k+len-1] -// so that Get and Set calls need not check for nil pointers. -bool -runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr len) -{ - uintptr end; - int32 i1; - MHeapMapNode2 *p2; - - end = k+len; - while(k < end) { - if((k >> MHeapMap_TotalBits) != 0) - return false; - i1 = (k >> MHeapMap_Level2Bits) & MHeapMap_Level1Mask; - - // first-level pointer - if(m->p[i1] == nil) { - p2 = m->allocator(sizeof *p2); - if(p2 == nil) - return false; - mstats.heapmap_sys += sizeof *p2; - m->p[i1] = p2; - } - - // advance key past this leaf node - k = ((k >> MHeapMap_Level2Bits) + 1) << MHeapMap_Level2Bits; - } - return true; -} - -#endif /* __SIZEOF_POINTER__ == 4 */ diff --git a/libgo/runtime/mheapmap32.h b/libgo/runtime/mheapmap32.h deleted file mode 100644 index 2861624690f..00000000000 --- a/libgo/runtime/mheapmap32.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Free(v) must be able to determine the MSpan containing v. -// The MHeapMap is a 2-level radix tree mapping page numbers to MSpans. - -typedef struct MHeapMapNode2 MHeapMapNode2; - -enum -{ - // 32 bit address - 12 bit page size = 20 bits to map - MHeapMap_Level1Bits = 10, - MHeapMap_Level2Bits = 10, - - MHeapMap_TotalBits = - MHeapMap_Level1Bits + - MHeapMap_Level2Bits, - - MHeapMap_Level1Mask = (1<<MHeapMap_Level1Bits) - 1, - MHeapMap_Level2Mask = (1<<MHeapMap_Level2Bits) - 1, -}; - -struct MHeapMap -{ - void *(*allocator)(uintptr); - MHeapMapNode2 *p[1<<MHeapMap_Level1Bits]; -}; - -struct MHeapMapNode2 -{ - MSpan *s[1<<MHeapMap_Level2Bits]; -}; - -void runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr)); -bool runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr npages); -MSpan* runtime_MHeapMap_Get(MHeapMap *m, PageID k); -MSpan* runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k); -void runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *v); - - diff --git a/libgo/runtime/mheapmap64.c b/libgo/runtime/mheapmap64.c deleted file mode 100644 index d6305953ad5..00000000000 --- a/libgo/runtime/mheapmap64.c +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Heap map, 64-bit version -// See malloc.h and mheap.c for overview. - -#include "runtime.h" -#include "malloc.h" - -#if __SIZEOF_POINTER__ == 8 - -// 3-level radix tree mapping page ids to Span*. -void -runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr)) -{ - m->allocator = allocator; -} - -MSpan* -runtime_MHeapMap_Get(MHeapMap *m, PageID k) -{ - int32 i1, i2, i3; - - i3 = k & MHeapMap_Level3Mask; - k >>= MHeapMap_Level3Bits; - i2 = k & MHeapMap_Level2Mask; - k >>= MHeapMap_Level2Bits; - i1 = k & MHeapMap_Level1Mask; - k >>= MHeapMap_Level1Bits; - if(k != 0) - runtime_throw("MHeapMap_Get"); - - return m->p[i1]->p[i2]->s[i3]; -} - -MSpan* -runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k) -{ - int32 i1, i2, i3; - MHeapMapNode2 *p2; - MHeapMapNode3 *p3; - - i3 = k & MHeapMap_Level3Mask; - k >>= MHeapMap_Level3Bits; - i2 = k & MHeapMap_Level2Mask; - k >>= MHeapMap_Level2Bits; - i1 = k & MHeapMap_Level1Mask; - k >>= MHeapMap_Level1Bits; - if(k != 0) - runtime_throw("MHeapMap_Get"); - - p2 = m->p[i1]; - if(p2 == nil) - return nil; - p3 = p2->p[i2]; - if(p3 == nil) - return nil; - return p3->s[i3]; -} - -void -runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *s) -{ - int32 i1, i2, i3; - - i3 = k & MHeapMap_Level3Mask; - k >>= MHeapMap_Level3Bits; - i2 = k & MHeapMap_Level2Mask; - k >>= MHeapMap_Level2Bits; - i1 = k & MHeapMap_Level1Mask; - k >>= MHeapMap_Level1Bits; - if(k != 0) - runtime_throw("MHeapMap_Set"); - - m->p[i1]->p[i2]->s[i3] = s; -} - -// Allocate the storage required for entries [k, k+1, ..., k+len-1] -// so that Get and Set calls need not check for nil pointers. -bool -runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr len) -{ - uintptr end; - int32 i1, i2; - MHeapMapNode2 *p2; - MHeapMapNode3 *p3; - - end = k+len; - while(k < end) { - if((k >> MHeapMap_TotalBits) != 0) - return false; - i2 = (k >> MHeapMap_Level3Bits) & MHeapMap_Level2Mask; - i1 = (k >> (MHeapMap_Level3Bits + MHeapMap_Level2Bits)) & MHeapMap_Level1Mask; - - // first-level pointer - if((p2 = m->p[i1]) == nil) { - p2 = m->allocator(sizeof *p2); - if(p2 == nil) - return false; - mstats.heapmap_sys += sizeof *p2; - m->p[i1] = p2; - } - - // second-level pointer - if(p2->p[i2] == nil) { - p3 = m->allocator(sizeof *p3); - if(p3 == nil) - return false; - mstats.heapmap_sys += sizeof *p3; - p2->p[i2] = p3; - } - - // advance key past this leaf node - k = ((k >> MHeapMap_Level3Bits) + 1) << MHeapMap_Level3Bits; - } - return true; -} - -#endif /* __SIZEOF_POINTER__ == 8 */ diff --git a/libgo/runtime/mheapmap64.h b/libgo/runtime/mheapmap64.h deleted file mode 100644 index be304cb2e8b..00000000000 --- a/libgo/runtime/mheapmap64.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Free(v) must be able to determine the MSpan containing v. -// The MHeapMap is a 3-level radix tree mapping page numbers to MSpans. -// -// NOTE(rsc): On a 32-bit platform (= 20-bit page numbers), -// we can swap in a 2-level radix tree. -// -// NOTE(rsc): We use a 3-level tree because tcmalloc does, but -// having only three levels requires approximately 1 MB per node -// in the tree, making the minimum map footprint 3 MB. -// Using a 4-level tree would cut the minimum footprint to 256 kB. -// On the other hand, it's just virtual address space: most of -// the memory is never going to be touched, thus never paged in. - -typedef struct MHeapMapNode2 MHeapMapNode2; -typedef struct MHeapMapNode3 MHeapMapNode3; - -enum -{ - // 64 bit address - 12 bit page size = 52 bits to map - MHeapMap_Level1Bits = 18, - MHeapMap_Level2Bits = 18, - MHeapMap_Level3Bits = 16, - - MHeapMap_TotalBits = - MHeapMap_Level1Bits + - MHeapMap_Level2Bits + - MHeapMap_Level3Bits, - - MHeapMap_Level1Mask = (1<<MHeapMap_Level1Bits) - 1, - MHeapMap_Level2Mask = (1<<MHeapMap_Level2Bits) - 1, - MHeapMap_Level3Mask = (1<<MHeapMap_Level3Bits) - 1, -}; - -struct MHeapMap -{ - void *(*allocator)(uintptr); - MHeapMapNode2 *p[1<<MHeapMap_Level1Bits]; -}; - -struct MHeapMapNode2 -{ - MHeapMapNode3 *p[1<<MHeapMap_Level2Bits]; -}; - -struct MHeapMapNode3 -{ - MSpan *s[1<<MHeapMap_Level3Bits]; -}; - -void runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr)); -bool runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr npages); -MSpan* runtime_MHeapMap_Get(MHeapMap *m, PageID k); -MSpan* runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k); -void runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *v); - - diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc index 6bd4ef72724..2e147edda02 100644 --- a/libgo/runtime/mprof.goc +++ b/libgo/runtime/mprof.goc @@ -67,7 +67,7 @@ stkbucket(uintptr *stk, int32 nstk) runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) return b; - b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1); + b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1); bucketmem += sizeof *b + nstk*sizeof stk[0]; runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); b->hash = h; @@ -134,7 +134,7 @@ setaddrbucket(uintptr addr, Bucket *b) if(ah->addr == (addr>>20)) goto found; - ah = runtime_mallocgc(sizeof *ah, RefNoProfiling, 0, 1); + ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); addrmem += sizeof *ah; ah->next = addrhash[h]; ah->addr = addr>>20; @@ -142,7 +142,7 @@ setaddrbucket(uintptr addr, Bucket *b) found: if((e = addrfree) == nil) { - e = runtime_mallocgc(64*sizeof *e, RefNoProfiling, 0, 0); + e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0); addrmem += 64*sizeof *e; for(i=0; i+1<64; i++) e[i].next = &e[i+1]; diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c index 8b021a2b6b3..6e82885bab4 100644 --- a/libgo/runtime/msize.c +++ b/libgo/runtime/msize.c @@ -57,7 +57,7 @@ runtime_SizeToClass(int32 size) void runtime_InitSizes(void) { - int32 align, sizeclass, size, osize, nextsize, n; + int32 align, sizeclass, size, nextsize, n; uint32 i; uintptr allocsize, npages; @@ -81,8 +81,7 @@ runtime_InitSizes(void) // the leftover is less than 1/8 of the total, // so wasted space is at most 12.5%. allocsize = PageSize; - osize = size + RefcountOverhead; - while(allocsize%osize > (allocsize/8)) + while(allocsize%size > allocsize/8) allocsize += PageSize; npages = allocsize >> PageShift; @@ -93,7 +92,7 @@ runtime_InitSizes(void) // different sizes. if(sizeclass > 1 && (int32)npages == runtime_class_to_allocnpages[sizeclass-1] - && allocsize/osize == allocsize/(runtime_class_to_size[sizeclass-1]+RefcountOverhead)) { + && allocsize/size == allocsize/runtime_class_to_size[sizeclass-1]) { runtime_class_to_size[sizeclass-1] = size; continue; } diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h index 95216e4a5ca..011ba7dab6b 100644 --- a/libgo/runtime/runtime.h +++ b/libgo/runtime/runtime.h @@ -185,6 +185,7 @@ void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64)); #define runtime_mmap mmap #define runtime_munmap(p, s) munmap((p), (s)) #define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) +#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) struct __go_func_type; void reflect_call(const struct __go_func_type *, const void *, _Bool, void **, diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc index b5f2954bc8e..7cbd739e51e 100644 --- a/libgo/runtime/sigqueue.goc +++ b/libgo/runtime/sigqueue.goc @@ -102,7 +102,7 @@ func Signame(sig int32) (name String) { s = buf; } int32 len = __builtin_strlen(s); - unsigned char *data = runtime_mallocgc(len, RefNoPointers, 0, 0); + unsigned char *data = runtime_mallocgc(len, FlagNoPointers, 0, 0); __builtin_memcpy(data, s, len); name.__data = data; name.__length = len; |