summaryrefslogtreecommitdiffstats
path: root/libgo/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/runtime')
-rw-r--r--libgo/runtime/go-byte-array-to-string.c2
-rw-r--r--libgo/runtime/go-int-array-to-string.c2
-rw-r--r--libgo/runtime/go-int-to-string.c2
-rw-r--r--libgo/runtime/go-new.c2
-rw-r--r--libgo/runtime/go-panic.c2
-rw-r--r--libgo/runtime/go-string-to-byte-array.c2
-rw-r--r--libgo/runtime/go-string-to-int-array.c2
-rw-r--r--libgo/runtime/go-strplus.c2
-rw-r--r--libgo/runtime/go-type.h4
-rw-r--r--libgo/runtime/go-unsafe-pointer.c4
-rw-r--r--libgo/runtime/malloc.goc216
-rw-r--r--libgo/runtime/malloc.h85
-rw-r--r--libgo/runtime/mcentral.c28
-rw-r--r--libgo/runtime/mem.c72
-rw-r--r--libgo/runtime/mem_posix_memalign.c8
-rw-r--r--libgo/runtime/mfinal.c17
-rw-r--r--libgo/runtime/mgc0.c771
-rw-r--r--libgo/runtime/mheap.c103
-rw-r--r--libgo/runtime/mheapmap32.c99
-rw-r--r--libgo/runtime/mheapmap32.h41
-rw-r--r--libgo/runtime/mheapmap64.c120
-rw-r--r--libgo/runtime/mheapmap64.h60
-rw-r--r--libgo/runtime/mprof.goc6
-rw-r--r--libgo/runtime/msize.c7
-rw-r--r--libgo/runtime/runtime.h1
-rw-r--r--libgo/runtime/sigqueue.goc2
26 files changed, 982 insertions, 678 deletions
diff --git a/libgo/runtime/go-byte-array-to-string.c b/libgo/runtime/go-byte-array-to-string.c
index 531730654d0..1b9ac2d7964 100644
--- a/libgo/runtime/go-byte-array-to-string.c
+++ b/libgo/runtime/go-byte-array-to-string.c
@@ -16,7 +16,7 @@ __go_byte_array_to_string (const void* p, size_t len)
struct __go_string ret;
bytes = (const unsigned char *) p;
- retdata = runtime_mallocgc (len, RefNoPointers, 1, 0);
+ retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0);
__builtin_memcpy (retdata, bytes, len);
ret.__data = retdata;
ret.__length = len;
diff --git a/libgo/runtime/go-int-array-to-string.c b/libgo/runtime/go-int-array-to-string.c
index 46a33dafc2e..c16589f01a2 100644
--- a/libgo/runtime/go-int-array-to-string.c
+++ b/libgo/runtime/go-int-array-to-string.c
@@ -41,7 +41,7 @@ __go_int_array_to_string (const void* p, size_t len)
slen += 4;
}
- retdata = runtime_mallocgc (slen, RefNoPointers, 1, 0);
+ retdata = runtime_mallocgc (slen, FlagNoPointers, 1, 0);
ret.__data = retdata;
ret.__length = slen;
diff --git a/libgo/runtime/go-int-to-string.c b/libgo/runtime/go-int-to-string.c
index 24d729cf893..af58015ed8f 100644
--- a/libgo/runtime/go-int-to-string.c
+++ b/libgo/runtime/go-int-to-string.c
@@ -51,7 +51,7 @@ __go_int_to_string (int v)
}
}
- retdata = runtime_mallocgc (len, RefNoPointers, 1, 0);
+ retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0);
__builtin_memcpy (retdata, buf, len);
ret.__data = retdata;
ret.__length = len;
diff --git a/libgo/runtime/go-new.c b/libgo/runtime/go-new.c
index a592174e50f..8f25c5730ef 100644
--- a/libgo/runtime/go-new.c
+++ b/libgo/runtime/go-new.c
@@ -17,5 +17,5 @@ __go_new (size_t size)
void *
__go_new_nopointers (size_t size)
{
- return runtime_mallocgc (size, RefNoPointers, 1, 1);
+ return runtime_mallocgc (size, FlagNoPointers, 1, 1);
}
diff --git a/libgo/runtime/go-panic.c b/libgo/runtime/go-panic.c
index 48d64416297..b684779cda8 100644
--- a/libgo/runtime/go-panic.c
+++ b/libgo/runtime/go-panic.c
@@ -112,7 +112,7 @@ __go_panic_msg (const char* msg)
struct __go_empty_interface arg;
len = __builtin_strlen (msg);
- sdata = runtime_mallocgc (len, RefNoPointers, 0, 0);
+ sdata = runtime_mallocgc (len, FlagNoPointers, 0, 0);
__builtin_memcpy (sdata, msg, len);
s.__data = sdata;
s.__length = len;
diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c
index 3b646c81abe..10c565e39a5 100644
--- a/libgo/runtime/go-string-to-byte-array.c
+++ b/libgo/runtime/go-string-to-byte-array.c
@@ -15,7 +15,7 @@ __go_string_to_byte_array (struct __go_string str)
unsigned char *data;
struct __go_open_array ret;
- data = (unsigned char *) runtime_mallocgc (str.__length, RefNoPointers, 1, 0);
+ data = (unsigned char *) runtime_mallocgc (str.__length, FlagNoPointers, 1, 0);
__builtin_memcpy (data, str.__data, str.__length);
ret.__values = (void *) data;
ret.__count = str.__length;
diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c
index 8d7f94f93ab..f59df6739f1 100644
--- a/libgo/runtime/go-string-to-int-array.c
+++ b/libgo/runtime/go-string-to-int-array.c
@@ -31,7 +31,7 @@ __go_string_to_int_array (struct __go_string str)
p += __go_get_rune (p, pend - p, &rune);
}
- data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), RefNoPointers,
+ data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), FlagNoPointers,
1, 0);
p = str.__data;
pd = data;
diff --git a/libgo/runtime/go-strplus.c b/libgo/runtime/go-strplus.c
index c0cd356ca63..e4dea9c4690 100644
--- a/libgo/runtime/go-strplus.c
+++ b/libgo/runtime/go-strplus.c
@@ -21,7 +21,7 @@ __go_string_plus (struct __go_string s1, struct __go_string s2)
return s1;
len = s1.__length + s2.__length;
- retdata = runtime_mallocgc (len, RefNoPointers, 1, 0);
+ retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0);
__builtin_memcpy (retdata, s1.__data, s1.__length);
__builtin_memcpy (retdata + s1.__length, s2.__data, s2.__length);
ret.__data = retdata;
diff --git a/libgo/runtime/go-type.h b/libgo/runtime/go-type.h
index b1f32850a00..76681217fdf 100644
--- a/libgo/runtime/go-type.h
+++ b/libgo/runtime/go-type.h
@@ -94,6 +94,10 @@ struct __go_type_descriptor
/* A pointer to fields which are only used for some types. */
const struct __go_uncommon_type *__uncommon;
+
+ /* The descriptor for the type which is a pointer to this type.
+ This may be NULL. */
+ const struct __go_type_descriptor *__pointer_to_this;
};
/* The information we store for each method of a type. */
diff --git a/libgo/runtime/go-unsafe-pointer.c b/libgo/runtime/go-unsafe-pointer.c
index 804360f8a89..9ec18003f1f 100644
--- a/libgo/runtime/go-unsafe-pointer.c
+++ b/libgo/runtime/go-unsafe-pointer.c
@@ -51,6 +51,8 @@ const struct __go_type_descriptor unsafe_Pointer =
/* __reflection */
&reflection_string,
/* __uncommon */
+ NULL,
+ /* __pointer_to_this */
NULL
};
@@ -90,6 +92,8 @@ const struct __go_ptr_type pointer_unsafe_Pointer =
/* __reflection */
&preflection_string,
/* __uncommon */
+ NULL,
+ /* __pointer_to_this */
NULL
},
/* __element_type */
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index d826d479f5c..5f99c4b73a4 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -45,14 +45,13 @@ fastrand1(void)
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
void*
-runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
+runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
{
int32 sizeclass, rate;
MCache *c;
uintptr npages;
MSpan *s;
void *v;
- uint32 *ref;
if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
runtime_throw("malloc/free - deadlock");
@@ -71,12 +70,6 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
mstats.alloc += size;
mstats.total_alloc += size;
mstats.by_size[sizeclass].nmalloc++;
-
- if(!runtime_mlookup(v, nil, nil, nil, &ref)) {
- // runtime_printf("malloc %D; runtime_mlookup failed\n", (uint64)size);
- runtime_throw("malloc runtime_mlookup");
- }
- *ref = RefNone | refflag;
} else {
// TODO(rsc): Report tracebacks for very large allocations.
@@ -93,14 +86,15 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
v = (void*)(s->start << PageShift);
// setup for mark sweep
- s->gcref0 = RefNone | refflag;
- ref = &s->gcref0;
+ runtime_markspan(v, 0, 0, true);
}
+ if(!(flag & FlagNoGC))
+ runtime_markallocated(v, size, (flag&FlagNoPointers) != 0);
__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) {
- if(!(refflag & RefNoProfiling))
+ if(!(flag & FlagNoProfiling))
__go_run_goroutine_gc(0);
else {
// We are being called from the profiler. Tell it
@@ -110,7 +104,7 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
}
}
- if(!(refflag & RefNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
+ if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
if(size >= (uint32) rate)
goto profile;
if((uint32) m->mcache->next_sample > size)
@@ -121,7 +115,7 @@ runtime_mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
rate = 0x3fffffff;
m->mcache->next_sample = fastrand1() % (2*rate);
profile:
- *ref |= RefProfiled;
+ runtime_setblockspecial(v);
runtime_MProf_Malloc(v, size);
}
}
@@ -141,32 +135,37 @@ __go_alloc(uintptr size)
void
__go_free(void *v)
{
- int32 sizeclass, size;
+ int32 sizeclass;
MSpan *s;
MCache *c;
- uint32 prof, *ref;
+ uint32 prof;
+ uintptr size;
if(v == nil)
return;
+
+ // If you change this also change mgc0.c:/^sweepspan,
+ // which has a copy of the guts of free.
if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
runtime_throw("malloc/free - deadlock");
- if(!runtime_mlookup(v, nil, nil, &s, &ref)) {
+ if(!runtime_mlookup(v, nil, nil, &s)) {
// runtime_printf("free %p: not an allocated block\n", v);
runtime_throw("free runtime_mlookup");
}
- prof = *ref & RefProfiled;
- *ref = RefFree;
+ prof = runtime_blockspecial(v);
// Find size class for v.
sizeclass = s->sizeclass;
if(sizeclass == 0) {
// Large object.
- if(prof)
- runtime_MProf_Free(v, s->npages<<PageShift);
- mstats.alloc -= s->npages<<PageShift;
- runtime_memclr(v, s->npages<<PageShift);
+ size = s->npages<<PageShift;
+ *(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed"
+ // Must mark v freed before calling unmarkspan and MHeap_Free:
+ // they might coalesce v into other spans and change the bitmap further.
+ runtime_markfreed(v, size);
+ runtime_unmarkspan(v, 1<<PageShift);
runtime_MHeap_Free(&runtime_mheap, s, 1);
} else {
// Small object.
@@ -174,12 +173,17 @@ __go_free(void *v)
size = runtime_class_to_size[sizeclass];
if(size > (int32)sizeof(uintptr))
((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
- if(prof)
- runtime_MProf_Free(v, size);
- mstats.alloc -= size;
+ // Must mark v freed before calling MCache_Free:
+ // it might coalesce v and other blocks into a bigger span
+ // and change the bitmap further.
+ runtime_markfreed(v, size);
mstats.by_size[sizeclass].nfree++;
runtime_MCache_Free(c, v, sizeclass, size);
}
+ mstats.alloc -= size;
+ if(prof)
+ runtime_MProf_Free(v, size);
+
__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
@@ -187,23 +191,22 @@ __go_free(void *v)
}
int32
-runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
+runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
{
- uintptr n, nobj, i;
+ uintptr n, i;
byte *p;
MSpan *s;
mstats.nlookup++;
- s = runtime_MHeap_LookupMaybe(&runtime_mheap, (uintptr)v>>PageShift);
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
if(sp)
*sp = s;
if(s == nil) {
+ runtime_checkfreed(v, 1);
if(base)
*base = nil;
if(size)
*size = 0;
- if(ref)
- *ref = 0;
return 0;
}
@@ -214,14 +217,11 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
*base = p;
if(size)
*size = s->npages<<PageShift;
- if(ref)
- *ref = &s->gcref0;
return 1;
}
- if((byte*)v >= (byte*)s->gcref) {
- // pointers into the gc ref counts
- // do not count as pointers.
+ if((byte*)v >= (byte*)s->limit) {
+ // pointers past the last block do not count as pointers.
return 0;
}
@@ -232,21 +232,6 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
if(size)
*size = n;
- // good for error checking, but expensive
- if(0) {
- nobj = (s->npages << PageShift) / (n + RefcountOverhead);
- if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) {
- // runtime_printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n",
- // s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages);
- // runtime_printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n",
- // s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift,
- // (uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift));
- runtime_throw("bad gcref");
- }
- }
- if(ref)
- *ref = &s->gcref[i];
-
return 1;
}
@@ -278,16 +263,90 @@ runtime_allocmcache(void)
extern int32 runtime_sizeof_C_MStats
__asm__ ("libgo_runtime.runtime.Sizeof_C_MStats");
+#define MaxArena32 (2U<<30)
+
void
runtime_mallocinit(void)
{
- runtime_sizeof_C_MStats = sizeof(MStats);
+ byte *p;
+ uintptr arena_size, bitmap_size;
+ extern byte end[];
- runtime_initfintab();
- runtime_Mprof_Init();
+ runtime_sizeof_C_MStats = sizeof(MStats);
- runtime_SysMemInit();
runtime_InitSizes();
+
+ // Set up the allocation arena, a contiguous area of memory where
+ // allocated data will be found. The arena begins with a bitmap large
+ // enough to hold 4 bits per allocated word.
+ if(sizeof(void*) == 8) {
+ // On a 64-bit machine, allocate from a single contiguous reservation.
+ // 16 GB should be big enough for now.
+ //
+ // The code will work with the reservation at any address, but ask
+ // SysReserve to use 0x000000f800000000 if possible.
+ // Allocating a 16 GB region takes away 36 bits, and the amd64
+ // doesn't let us choose the top 17 bits, so that leaves the 11 bits
+ // in the middle of 0x00f8 for us to choose. Choosing 0x00f8 means
+ // that the valid memory addresses will begin 0x00f8, 0x00f9, 0x00fa, 0x00fb.
+ // None of the bytes f8 f9 fa fb can appear in valid UTF-8, and
+ // they are otherwise as far from ff (likely a common byte) as possible.
+ // Choosing 0x00 for the leading 6 bits was more arbitrary, but it
+ // is not a common ASCII code point either. Using 0x11f8 instead
+ // caused out of memory errors on OS X during thread allocations.
+ // These choices are both for debuggability and to reduce the
+ // odds of the conservative garbage collector not collecting memory
+ // because some non-pointer block of memory had a bit pattern
+ // that matched a memory address.
+ //
+ // Actually we reserve 17 GB (because the bitmap ends up being 1 GB)
+ // but it hardly matters: fc is not valid UTF-8 either, and we have to
+ // allocate 15 GB before we get that far.
+ arena_size = (uintptr)(16LL<<30);
+ bitmap_size = arena_size / (sizeof(void*)*8/4);
+ p = runtime_SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size);
+ if(p == nil)
+ runtime_throw("runtime: cannot reserve arena virtual address space");
+ } else {
+ // On a 32-bit machine, we can't typically get away
+ // with a giant virtual address space reservation.
+ // Instead we map the memory information bitmap
+ // immediately after the data segment, large enough
+ // to handle another 2GB of mappings (256 MB),
+ // along with a reservation for another 512 MB of memory.
+ // When that gets used up, we'll start asking the kernel
+ // for any memory anywhere and hope it's in the 2GB
+ // following the bitmap (presumably the executable begins
+ // near the bottom of memory, so we'll have to use up
+ // most of memory before the kernel resorts to giving out
+ // memory before the beginning of the text segment).
+ //
+ // Alternatively we could reserve 512 MB bitmap, enough
+ // for 4GB of mappings, and then accept any memory the
+ // kernel threw at us, but normally that's a waste of 512 MB
+ // of address space, which is probably too much in a 32-bit world.
+ bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
+ arena_size = 512<<20;
+
+ // SysReserve treats the address we ask for, end, as a hint,
+ // not as an absolute requirement. If we ask for the end
+ // of the data segment but the operating system requires
+ // a little more space before we can start allocating, it will
+ // give out a slightly higher pointer. That's fine.
+ // Run with what we get back.
+ p = runtime_SysReserve(end, bitmap_size + arena_size);
+ if(p == nil)
+ runtime_throw("runtime: cannot reserve arena virtual address space");
+ }
+ if((uintptr)p & (((uintptr)1<<PageShift)-1))
+ runtime_throw("runtime: SysReserve returned unaligned address");
+
+ runtime_mheap.bitmap = p;
+ runtime_mheap.arena_start = p + bitmap_size;
+ runtime_mheap.arena_used = runtime_mheap.arena_start;
+ runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size;
+
+ // Initialize the rest of the allocator.
runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
m->mcache = runtime_allocmcache();
@@ -295,6 +354,47 @@ runtime_mallocinit(void)
runtime_free(runtime_malloc(1));
}
+void*
+runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
+{
+ byte *p;
+
+ if(n <= (uintptr)(h->arena_end - h->arena_used)) {
+ // Keep taking from our reservation.
+ p = h->arena_used;
+ runtime_SysMap(p, n);
+ h->arena_used += n;
+ runtime_MHeap_MapBits(h);
+ return p;
+ }
+
+ // On 64-bit, our reservation is all we have.
+ if(sizeof(void*) == 8)
+ return nil;
+
+ // On 32-bit, once the reservation is gone we can
+ // try to get memory at a location chosen by the OS
+ // and hope that it is in the range we allocated bitmap for.
+ p = runtime_SysAlloc(n);
+ if(p == nil)
+ return nil;
+
+ if(p < h->arena_start || (uintptr)(p+n - h->arena_start) >= MaxArena32) {
+ runtime_printf("runtime: memory allocated by OS not in usable range");
+ runtime_SysFree(p, n);
+ return nil;
+ }
+
+ if(p+n > h->arena_used) {
+ h->arena_used = p+n;
+ if(h->arena_used > h->arena_end)
+ h->arena_end = h->arena_used;
+ runtime_MHeap_MapBits(h);
+ }
+
+ return p;
+}
+
// Runtime stubs.
void*
@@ -303,6 +403,10 @@ runtime_mal(uintptr n)
return runtime_mallocgc(n, 0, 1, 1);
}
+func new(n uint32) (ret *uint8) {
+ ret = runtime_mal(n);
+}
+
func Alloc(n uintptr) (p *byte) {
p = runtime_malloc(n);
}
@@ -312,7 +416,7 @@ func Free(p *byte) {
}
func Lookup(p *byte) (base *byte, size uintptr) {
- runtime_mlookup(p, &base, &size, nil, nil);
+ runtime_mlookup(p, &base, &size, nil);
}
func GC() {
@@ -333,7 +437,7 @@ func SetFinalizer(obj Eface, finalizer Eface) {
// runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string);
goto throw;
}
- if(!runtime_mlookup(obj.__object, &base, &size, nil, nil) || obj.__object != base) {
+ if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) {
// runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
goto throw;
}
diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h
index 369f9b8e771..8131e964e49 100644
--- a/libgo/runtime/malloc.h
+++ b/libgo/runtime/malloc.h
@@ -19,7 +19,6 @@
// used to manage storage used by the allocator.
// MHeap: the malloc heap, managed at page (4096-byte) granularity.
// MSpan: a run of pages managed by the MHeap.
-// MHeapMap: a mapping from page IDs to MSpans.
// MCentral: a shared free list for a given size class.
// MCache: a per-thread (in Go, per-M) cache for small objects.
// MStats: allocation statistics.
@@ -84,7 +83,6 @@
typedef struct FixAlloc FixAlloc;
typedef struct MCentral MCentral;
typedef struct MHeap MHeap;
-typedef struct MHeapMap MHeapMap;
typedef struct MSpan MSpan;
typedef struct MStats MStats;
typedef struct MLink MLink;
@@ -99,8 +97,14 @@ typedef uintptr PageID; // address >> PageShift
enum
{
+ // Computed constant. The definition of MaxSmallSize and the
+ // algorithm in msize.c produce some number of different allocation
+ // size classes. NumSizeClasses is that number. It's needed here
+ // because there are static arrays of this length; when msize runs its
+ // size choosing algorithm it double-checks that NumSizeClasses agrees.
+ NumSizeClasses = 61,
+
// Tunable constants.
- NumSizeClasses = 67, // Number of size classes (must match msize.c)
MaxSmallSize = 32<<10,
FixAllocChunk = 128<<10, // Chunk size for FixAlloc
@@ -108,13 +112,16 @@ enum
MaxMCacheSize = 2<<20, // Maximum bytes in one MCache
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth
-};
+ // Number of bits in page to span calculations (4k pages).
+ // On 64-bit, we limit the arena to 16G, so 22 bits suffices.
+ // On 32-bit, we don't bother limiting anything: 20 bits for 4G.
#if __SIZEOF_POINTER__ == 8
-#include "mheapmap64.h"
+ MHeapMap_Bits = 22,
#else
-#include "mheapmap32.h"
+ MHeapMap_Bits = 20,
#endif
+};
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
struct MLink
@@ -124,7 +131,8 @@ struct MLink
// SysAlloc obtains a large chunk of zeroed memory from the
// operating system, typically on the order of a hundred kilobytes
-// or a megabyte.
+// or a megabyte. If the pointer argument is non-nil, the caller
+// wants a mapping there or nowhere.
//
// SysUnused notifies the operating system that the contents
// of the memory region are no longer needed and can be reused
@@ -134,11 +142,19 @@ struct MLink
// SysFree returns it unconditionally; this is only used if
// an out-of-memory error has been detected midway through
// an allocation. It is okay if SysFree is a no-op.
+//
+// SysReserve reserves address space without allocating memory.
+// If the pointer passed to it is non-nil, the caller wants the
+// reservation there, but SysReserve can still choose another
+// location if that one is unavailable.
+//
+// SysMap maps previously reserved address space for use.
void* runtime_SysAlloc(uintptr nbytes);
void runtime_SysFree(void *v, uintptr nbytes);
void runtime_SysUnused(void *v, uintptr nbytes);
-void runtime_SysMemInit(void);
+void runtime_SysMap(void *v, uintptr nbytes);
+void* runtime_SysReserve(void *v, uintptr nbytes);
// FixAlloc is a simple free-list allocator for fixed size objects.
// Malloc uses a FixAlloc wrapped around SysAlloc to manages its
@@ -194,7 +210,6 @@ struct MStats
uint64 mspan_sys;
uint64 mcache_inuse; // MCache structures
uint64 mcache_sys;
- uint64 heapmap_sys; // heap map
uint64 buckhash_sys; // profiling bucket hash table
// Statistics about garbage collector.
@@ -281,10 +296,7 @@ struct MSpan
uint32 ref; // number of allocated objects in this span
uint32 sizeclass; // size class
uint32 state; // MSpanInUse etc
- union {
- uint32 *gcref; // sizeclass > 0
- uint32 gcref0; // sizeclass == 0
- };
+ byte *limit; // end of data in span
};
void runtime_MSpan_Init(MSpan *span, PageID start, uintptr npages);
@@ -323,11 +335,14 @@ struct MHeap
MSpan *allspans;
// span lookup
- MHeapMap map;
+ MSpan *map[1<<MHeapMap_Bits];
// range of addresses we might see in the heap
- byte *min;
- byte *max;
+ byte *bitmap;
+ uintptr bitmap_mapped;
+ byte *arena_start;
+ byte *arena_used;
+ byte *arena_end;
// central free lists for small size classes.
// the union makes sure that the MCentrals are
@@ -346,31 +361,31 @@ extern MHeap runtime_mheap;
void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr));
MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct);
void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct);
-MSpan* runtime_MHeap_Lookup(MHeap *h, PageID p);
-MSpan* runtime_MHeap_LookupMaybe(MHeap *h, PageID p);
-void runtime_MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj);
+MSpan* runtime_MHeap_Lookup(MHeap *h, void *v);
+MSpan* runtime_MHeap_LookupMaybe(MHeap *h, void *v);
+void runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
+void* runtime_MHeap_SysAlloc(MHeap *h, uintptr n);
+void runtime_MHeap_MapBits(MHeap *h);
void* runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
-int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref);
+int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s);
void runtime_gc(int32 force);
-
-void* runtime_SysAlloc(uintptr);
-void runtime_SysUnused(void*, uintptr);
-void runtime_SysFree(void*, uintptr);
+void runtime_markallocated(void *v, uintptr n, bool noptr);
+void runtime_checkallocated(void *v, uintptr n);
+void runtime_markfreed(void *v, uintptr n);
+void runtime_checkfreed(void *v, uintptr n);
+int32 runtime_checking;
+void runtime_markspan(void *v, uintptr size, uintptr n, bool leftover);
+void runtime_unmarkspan(void *v, uintptr size);
+bool runtime_blockspecial(void*);
+void runtime_setblockspecial(void*);
enum
{
- RefcountOverhead = 4, // one uint32 per object
-
- RefFree = 0, // must be zero
- RefStack, // stack segment - don't free and don't scan for pointers
- RefNone, // no references
- RefSome, // some references
- RefNoPointers = 0x80000000U, // flag - no pointers here
- RefHasFinalizer = 0x40000000U, // flag - has finalizer
- RefProfiled = 0x20000000U, // flag - is in profiling table
- RefNoProfiling = 0x10000000U, // flag - must not profile
- RefFlags = 0xFFFF0000U,
+ // flags to malloc
+ FlagNoPointers = 1<<0, // no pointers here
+ FlagNoProfiling = 1<<1, // must not profile
+ FlagNoGC = 1<<2, // must not free or scan for pointers
};
void runtime_Mprof_Init(void);
diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c
index 81e54b07dbf..cd3d6ca5ede 100644
--- a/libgo/runtime/mcentral.c
+++ b/libgo/runtime/mcentral.c
@@ -114,13 +114,11 @@ static void
MCentral_Free(MCentral *c, void *v)
{
MSpan *s;
- PageID page;
- MLink *p, *next;
+ MLink *p;
int32 size;
// Find span for v.
- page = (uintptr)v >> PageShift;
- s = runtime_MHeap_Lookup(&runtime_mheap, page);
+ s = runtime_MHeap_Lookup(&runtime_mheap, v);
if(s == nil || s->ref == 0)
runtime_throw("invalid free");
@@ -140,16 +138,8 @@ MCentral_Free(MCentral *c, void *v)
if(--s->ref == 0) {
size = runtime_class_to_size[c->sizeclass];
runtime_MSpanList_Remove(s);
- // The second word of each freed block indicates
- // whether it needs to be zeroed. The first word
- // is the link pointer and must always be cleared.
- for(p=s->freelist; p; p=next) {
- next = p->next;
- if(size > (int32)sizeof(uintptr) && ((uintptr*)p)[1] != 0)
- runtime_memclr((byte*)p, size);
- else
- p->next = nil;
- }
+ runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+ *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime_unlock(c);
@@ -159,7 +149,7 @@ MCentral_Free(MCentral *c, void *v)
}
void
-runtime_MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj)
+runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
{
int32 size;
int32 npages;
@@ -168,7 +158,7 @@ runtime_MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *
size = runtime_class_to_size[sizeclass];
*npagesp = npages;
*sizep = size;
- *nobj = (npages << PageShift) / (size + RefcountOverhead);
+ *nobj = (npages << PageShift) / size;
}
// Fetch a new span from the heap and
@@ -176,7 +166,8 @@ runtime_MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *
static bool
MCentral_Grow(MCentral *c)
{
- int32 i, n, npages, size;
+ int32 i, n, npages;
+ uintptr size;
MLink **tailp, *v;
byte *p;
MSpan *s;
@@ -193,7 +184,7 @@ MCentral_Grow(MCentral *c)
// Carve span into sequence of blocks.
tailp = &s->freelist;
p = (byte*)(s->start << PageShift);
- s->gcref = (uint32*)(p + size*n);
+ s->limit = p + size*n;
for(i=0; i<n; i++) {
v = (MLink*)p;
*tailp = v;
@@ -201,6 +192,7 @@ MCentral_Grow(MCentral *c)
p += size;
}
*tailp = nil;
+ runtime_markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
runtime_lock(c);
c->nfree += n;
diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c
index 4d6c7420907..f62a4d37ee7 100644
--- a/libgo/runtime/mem.c
+++ b/libgo/runtime/mem.c
@@ -38,12 +38,11 @@ runtime_SysAlloc(uintptr n)
p = runtime_mmap(nil, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0);
if (p == MAP_FAILED) {
if(errno == EACCES) {
- printf("mmap: access denied\n");
- printf("If you're running SELinux, enable execmem for this process.\n");
- } else {
- printf("mmap: errno=%d\n", errno);
+ printf("runtime: mmap: access denied\n");
+ printf("if you're running SELinux, enable execmem for this process.\n");
+ exit(2);
}
- exit(2);
+ return nil;
}
return p;
}
@@ -63,14 +62,61 @@ runtime_SysFree(void *v, uintptr n)
runtime_munmap(v, n);
}
+void*
+runtime_SysReserve(void *v, uintptr n)
+{
+ int fd = -1;
+
+ // On 64-bit, people with ulimit -v set complain if we reserve too
+ // much address space. Instead, assume that the reservation is okay
+ // and check the assumption in SysMap.
+ if(sizeof(void*) == 8)
+ return v;
+
+#ifdef USE_DEV_ZERO
+ if (dev_zero == -1) {
+ dev_zero = open("/dev/zero", O_RDONLY);
+ if (dev_zero < 0) {
+ printf("open /dev/zero: errno=%d\n", errno);
+ exit(2);
+ }
+ }
+ fd = dev_zero;
+#endif
+
+ return runtime_mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, fd, 0);
+}
+
void
-runtime_SysMemInit(void)
+runtime_SysMap(void *v, uintptr n)
{
- // Code generators assume that references to addresses
- // on the first page will fault. Map the page explicitly with
- // no permissions, to head off possible bugs like the system
- // allocating that page as the virtual address space fills.
- // Ignore any error, since other systems might be smart
- // enough to never allow anything there.
- runtime_mmap(nil, 4096, PROT_NONE, MAP_FIXED|MAP_ANON|MAP_PRIVATE, -1, 0);
+ void *p;
+ int fd = -1;
+
+ mstats.sys += n;
+
+#ifdef USE_DEV_ZERO
+ if (dev_zero == -1) {
+ dev_zero = open("/dev/zero", O_RDONLY);
+ if (dev_zero < 0) {
+ printf("open /dev/zero: errno=%d\n", errno);
+ exit(2);
+ }
+ }
+ fd = dev_zero;
+#endif
+
+ // On 64-bit, we don't actually have v reserved, so tread carefully.
+ if(sizeof(void*) == 8) {
+ p = runtime_mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, fd, 0);
+ if(p != v) {
+ runtime_printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+ runtime_throw("runtime: address space conflict");
+ }
+ return;
+ }
+
+ p = runtime_mmap(v, n, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_FIXED|MAP_PRIVATE, fd, 0);
+ if(p != v)
+ runtime_throw("runtime: cannot map pages in arena address space");
}
diff --git a/libgo/runtime/mem_posix_memalign.c b/libgo/runtime/mem_posix_memalign.c
index 3855dfcf185..2318be8da11 100644
--- a/libgo/runtime/mem_posix_memalign.c
+++ b/libgo/runtime/mem_posix_memalign.c
@@ -32,7 +32,13 @@ runtime_SysFree(void *v, uintptr n)
free(v);
}
+void*
+runtime_SysReserve(void *v, uintptr n)
+{
+ return runtime_SysAlloc(n);
+}
+
void
-runtime_SysMemInit(void)
+runtime_SysMap(void *v, uintptr n)
{
}
diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c
index 23c0d7a1663..04d58dddda9 100644
--- a/libgo/runtime/mfinal.c
+++ b/libgo/runtime/mfinal.c
@@ -5,6 +5,9 @@
#include "runtime.h"
#include "malloc.h"
+// Lock to protect finalizer data structures.
+// Cannot reuse mheap.Lock because the finalizer
+// maintenance requires allocation.
static Lock finlock;
void
@@ -95,7 +98,6 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
{
Fintab newtab;
int32 i;
- uint32 *ref;
byte *base;
Finalizer *e;
@@ -110,25 +112,22 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
runtime_throw("finalizer deadlock");
runtime_lock(&finlock);
- if(!runtime_mlookup(p, &base, nil, nil, &ref) || p != base) {
+ if(!runtime_mlookup(p, &base, nil, nil) || p != base) {
runtime_unlock(&finlock);
__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
runtime_throw("addfinalizer on invalid pointer");
}
if(f == nil) {
- if(*ref & RefHasFinalizer) {
- lookfintab(&fintab, p, 1);
- *ref &= ~RefHasFinalizer;
- }
+ lookfintab(&fintab, p, 1);
goto unlock;
}
- if(*ref & RefHasFinalizer) {
+ if(lookfintab(&fintab, p, 0)) {
runtime_unlock(&finlock);
__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
runtime_throw("double finalizer");
}
- *ref |= RefHasFinalizer;
+ runtime_setblockspecial(p);
if(fintab.nkey >= fintab.max/2+fintab.max/4) {
// keep table at most 3/4 full:
@@ -144,7 +143,7 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
newtab.max *= 3;
}
- newtab.key = runtime_mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1);
+ newtab.key = runtime_mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1);
newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
for(i=0; i<fintab.max; i++) {
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index f2703ab0263..27fc3cdcc4d 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -2,28 +2,65 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Garbage collector -- step 0.
-//
-// Stop the world, mark and sweep garbage collector.
-// NOT INTENDED FOR PRODUCTION USE.
-//
-// A mark and sweep collector provides a way to exercise
-// and test the memory allocator and the stack walking machinery
-// without also needing to get reference counting
-// exactly right.
+// Garbage collector.
#include "runtime.h"
#include "malloc.h"
enum {
- Debug = 0
+ Debug = 0,
+ UseCas = 1,
+ PtrSize = sizeof(void*),
+
+ // Four bits per word (see #defines below).
+ wordsPerBitmapWord = sizeof(void*)*8/4,
+ bitShift = sizeof(void*)*8/4,
};
-typedef struct BlockList BlockList;
-struct BlockList
+// Bits in per-word bitmap.
+// #defines because enum might not be able to hold the values.
+//
+// Each word in the bitmap describes wordsPerBitmapWord words
+// of heap memory. There are 4 bitmap bits dedicated to each heap word,
+// so on a 64-bit system there is one bitmap word per 16 heap words.
+// The bits in the word are packed together by type first, then by
+// heap location, so each 64-bit bitmap word consists of, from top to bottom,
+// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
+// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
+// This layout makes it easier to iterate over the bits of a given type.
+//
+// The bitmap starts at mheap.arena_start and extends *backward* from
+// there. On a 64-bit system the off'th word in the arena is tracked by
+// the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
+// the only difference is that the divisor is 8.)
+//
+// To pull out the bits corresponding to a given pointer p, we use:
+//
+// off = p - (uintptr*)mheap.arena_start; // word offset
+// b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
+// shift = off % wordsPerBitmapWord
+// bits = *b >> shift;
+// /* then test bits & bitAllocated, bits & bitMarked, etc. */
+//
+#define bitAllocated ((uintptr)1<<(bitShift*0))
+#define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
+#define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
+#define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
+#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
+
+#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+
+static uint64 nlookup;
+static uint64 nsizelookup;
+static uint64 naddrlookup;
+static int32 gctrace;
+
+typedef struct Workbuf Workbuf;
+struct Workbuf
{
- byte *obj;
- uintptr size;
+ Workbuf *next;
+ uintptr nw;
+ byte *w[2048-2];
};
static bool finstarted;
@@ -31,83 +68,265 @@ static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
static Finalizer *finq;
static int32 fingwait;
-static BlockList *bl, *ebl;
static void runfinq(void*);
-
-enum {
- PtrSize = sizeof(void*)
-};
-
+static Workbuf* getempty(Workbuf*);
+static Workbuf* getfull(Workbuf*);
+
+// scanblock scans a block of n bytes starting at pointer b for references
+// to other objects, scanning any it finds recursively until there are no
+// unscanned objects left. Instead of using an explicit recursion, it keeps
+// a work list in the Workbuf* structures and loops in the main function
+// body. Keeping an explicit work list is easier on the stack allocator and
+// more efficient.
static void
scanblock(byte *b, int64 n)
{
- int32 off;
- void *obj;
- uintptr size;
- uint32 *refp, ref;
+ byte *obj, *arena_start, *p;
void **vp;
- int64 i;
- BlockList *w;
-
- w = bl;
- w->obj = b;
- w->size = n;
- w++;
+ uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
+ MSpan *s;
+ PageID k;
+ void **bw, **w, **ew;
+ Workbuf *wbuf;
- while(w > bl) {
- w--;
- b = w->obj;
- n = w->size;
+ // Memory arena parameters.
+ arena_start = runtime_mheap.arena_start;
+
+ wbuf = nil; // current work buffer
+ ew = nil; // end of work buffer
+ bw = nil; // beginning of work buffer
+ w = nil; // current pointer into work buffer
+
+ // Align b to a word boundary.
+ off = (uintptr)b & (PtrSize-1);
+ if(off != 0) {
+ b += PtrSize - off;
+ n -= PtrSize - off;
+ }
+ for(;;) {
+ // Each iteration scans the block b of length n, queueing pointers in
+ // the work buffer.
if(Debug > 1)
runtime_printf("scanblock %p %lld\n", b, (long long) n);
- off = (uint32)(uintptr)b & (PtrSize-1);
- if(off) {
- b += PtrSize - off;
- n -= PtrSize - off;
- }
-
+
vp = (void**)b;
n /= PtrSize;
- for(i=0; i<n; i++) {
- obj = vp[i];
- if(obj == nil)
+ for(i=0; i<(uintptr)n; i++) {
+ obj = (byte*)vp[i];
+
+ // Words outside the arena cannot be pointers.
+ if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used)
continue;
- if(runtime_mheap.min <= (byte*)obj && (byte*)obj < runtime_mheap.max) {
- if(runtime_mlookup(obj, (byte**)&obj, &size, nil, &refp)) {
- ref = *refp;
- switch(ref & ~RefFlags) {
- case RefNone:
- if(Debug > 1)
- runtime_printf("found at %p: ", &vp[i]);
- *refp = RefSome | (ref & RefFlags);
- if(!(ref & RefNoPointers)) {
- if(w >= ebl)
- runtime_throw("scanblock: garbage collection stack overflow");
- w->obj = obj;
- w->size = size;
- w++;
- }
- break;
- }
+
+ // obj may be a pointer to a live object.
+ // Try to find the beginning of the object.
+
+ // Round down to word boundary.
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+ // Find bits for this word.
+ off = (uintptr*)obj - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Pointing at the beginning of a block?
+ if((bits & (bitAllocated|bitBlockBoundary)) != 0)
+ goto found;
+
+ // Pointing just past the beginning?
+ // Scan backward a little to find a block boundary.
+ for(j=shift; j-->0; ) {
+ if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
+ obj = (byte*)obj - (shift-j)*PtrSize;
+ shift = j;
+ bits = xbits>>shift;
+ goto found;
}
}
+
+ // Otherwise consult span table to find beginning.
+ // (Manually inlined copy of MHeap_LookupMaybe.)
+ nlookup++;
+ naddrlookup++;
+ k = (uintptr)obj>>PageShift;
+ x = k;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)arena_start>>PageShift;
+ s = runtime_mheap.map[x];
+ if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
+ continue;
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ } else {
+ if((byte*)obj >= (byte*)s->limit)
+ continue;
+ size = runtime_class_to_size[s->sizeclass];
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
+
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ found:
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // If not allocated or already marked, done.
+ if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
+ continue;
+ *bitp |= bitMarked<<shift;
+
+ // If object has no pointers, don't need to scan further.
+ if((bits & bitNoPointers) != 0)
+ continue;
+
+ // If buffer is full, get a new one.
+ if(w >= ew) {
+ wbuf = getempty(wbuf);
+ bw = (void**)wbuf->w;
+ w = bw;
+ ew = bw + nelem(wbuf->w);
+ }
+ *w++ = obj;
}
+
+ // Done scanning [b, b+n). Prepare for the next iteration of
+ // the loop by setting b and n to the parameters for the next block.
+
+ // Fetch b from the work buffers.
+ if(w <= bw) {
+ // Emptied our buffer: refill.
+ wbuf = getfull(wbuf);
+ if(wbuf == nil)
+ break;
+ bw = (void**)wbuf->w;
+ ew = (void**)(wbuf->w + nelem(wbuf->w));
+ w = bw+wbuf->nw;
+ }
+ b = *--w;
+
+ // Figure out n = size of b. Start by loading bits for b.
+ off = (uintptr*)b - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Might be small; look for nearby block boundary.
+ // A block boundary is marked by either bitBlockBoundary
+ // or bitAllocated being set (see notes near their definition).
+ enum {
+ boundary = bitBlockBoundary|bitAllocated
+ };
+ // Look for a block boundary both after and before b
+ // in the same bitmap word.
+ //
+ // A block boundary j words after b is indicated by
+ // bits>>j & boundary
+ // assuming shift+j < bitShift. (If shift+j >= bitShift then
+ // we'll be bleeding other bit types like bitMarked into our test.)
+ // Instead of inserting the conditional shift+j < bitShift into the loop,
+ // we can let j range from 1 to bitShift as long as we first
+ // apply a mask to keep only the bits corresponding
+ // to shift+j < bitShift aka j < bitShift-shift.
+ bits &= (boundary<<(bitShift-shift)) - boundary;
+
+ // A block boundary j words before b is indicated by
+ // xbits>>(shift-j) & boundary
+ // (assuming shift >= j). There is no cleverness here
+ // avoid the test, because when j gets too large the shift
+ // turns negative, which is undefined in C.
+
+ for(j=1; j<bitShift; j++) {
+ if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
+ n = j*PtrSize;
+ goto scan;
+ }
+ }
+
+ // Fall back to asking span about size class.
+ // (Manually inlined copy of MHeap_Lookup.)
+ nlookup++;
+ nsizelookup++;
+ x = (uintptr)b>>PageShift;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)arena_start>>PageShift;
+ s = runtime_mheap.map[x];
+ if(s->sizeclass == 0)
+ n = s->npages<<PageShift;
+ else
+ n = runtime_class_to_size[s->sizeclass];
+ scan:;
+ }
+}
+
+static struct {
+ Workbuf *full;
+ Workbuf *empty;
+ byte *chunk;
+ uintptr nchunk;
+} work;
+
+// Get an empty work buffer off the work.empty list,
+// allocating new buffers as needed.
+static Workbuf*
+getempty(Workbuf *b)
+{
+ if(b != nil) {
+ b->nw = nelem(b->w);
+ b->next = work.full;
+ work.full = b;
+ }
+ b = work.empty;
+ if(b != nil) {
+ work.empty = b->next;
+ return b;
+ }
+
+ if(work.nchunk < sizeof *b) {
+ work.nchunk = 1<<20;
+ work.chunk = runtime_SysAlloc(work.nchunk);
}
+ b = (Workbuf*)work.chunk;
+ work.chunk += sizeof *b;
+ work.nchunk -= sizeof *b;
+ return b;
}
+// Get a full work buffer off the work.full list, or return nil.
+static Workbuf*
+getfull(Workbuf *b)
+{
+ if(b != nil) {
+ b->nw = 0;
+ b->next = work.empty;
+ work.empty = b;
+ }
+ b = work.full;
+ if(b != nil)
+ work.full = b->next;
+ return b;
+}
+
+// Scanstack calls scanblock on each of gp's stack segments.
static void
markfin(void *v)
{
uintptr size;
- uint32 *refp;
size = 0;
- refp = nil;
- if(!runtime_mlookup(v, (byte**)&v, &size, nil, &refp) || !(*refp & RefHasFinalizer))
+ if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
runtime_throw("mark - finalizer inconsistency");
-
+
// do not mark the finalizer block itself. just mark the things it points at.
scanblock(v, size);
}
@@ -131,32 +350,12 @@ __go_register_gc_roots (struct root_list* r)
roots = r;
}
+// Mark
static void
mark(void)
{
- uintptr blsize, nobj;
struct root_list *pl;
- // Figure out how big an object stack we need.
- // Get a new one if we need more than we have
- // or we need significantly less than we have.
- nobj = mstats.heap_objects;
- if(nobj > (uintptr)(ebl - bl) || nobj < (uintptr)(ebl-bl)/4) {
- if(bl != nil)
- runtime_SysFree(bl, (byte*)ebl - (byte*)bl);
-
- // While we're allocated a new object stack,
- // add 20% headroom and also round up to
- // the nearest page boundary, since mmap
- // will anyway.
- nobj = nobj * 12/10;
- blsize = nobj * sizeof *bl;
- blsize = (blsize + 4095) & ~4095;
- nobj = blsize / sizeof *bl;
- bl = runtime_SysAlloc(blsize);
- ebl = bl + nobj;
- }
-
for(pl = roots; pl != nil; pl = pl->next) {
struct root* pr = &pl->roots[0];
while(1) {
@@ -179,97 +378,85 @@ mark(void)
runtime_walkfintab(markfin, scanblock);
}
-// free RefNone, free & queue finalizers for RefNone|RefHasFinalizer, reset RefSome
+// Sweep frees or calls finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
static void
-sweepspan(MSpan *s)
+sweep(void)
{
- int32 n, npages, size;
+ MSpan *s;
+ int32 cl, n, npages;
+ uintptr size;
byte *p;
- uint32 ref, *gcrefp, *gcrefep;
MCache *c;
Finalizer *f;
- p = (byte*)(s->start << PageShift);
- if(s->sizeclass == 0) {
- // Large block.
- ref = s->gcref0;
- switch(ref & ~(RefFlags^RefHasFinalizer)) {
- case RefNone:
- // Free large object.
- mstats.alloc -= s->npages<<PageShift;
- mstats.nfree++;
- runtime_memclr(p, s->npages<<PageShift);
- if(ref & RefProfiled)
- runtime_MProf_Free(p, s->npages<<PageShift);
- s->gcref0 = RefFree;
- runtime_MHeap_Free(&runtime_mheap, s, 1);
- break;
- case RefNone|RefHasFinalizer:
- f = runtime_getfinalizer(p, 1);
- if(f == nil)
- runtime_throw("finalizer inconsistency");
- f->arg = p;
- f->next = finq;
- finq = f;
- ref &= ~RefHasFinalizer;
- // fall through
- case RefSome:
- case RefSome|RefHasFinalizer:
- s->gcref0 = RefNone | (ref&RefFlags);
- break;
+ for(s = runtime_mheap.allspans; s != nil; s = s->allnext) {
+ if(s->state != MSpanInUse)
+ continue;
+
+ p = (byte*)(s->start << PageShift);
+ cl = s->sizeclass;
+ if(cl == 0) {
+ size = s->npages<<PageShift;
+ n = 1;
+ } else {
+ // Chunk full of small blocks.
+ size = runtime_class_to_size[cl];
+ npages = runtime_class_to_allocnpages[cl];
+ n = (npages << PageShift) / size;
}
- return;
- }
+
+ // sweep through n objects of given size starting at p.
+ for(; n > 0; n--, p += size) {
+ uintptr off, *bitp, shift, bits;
- // Chunk full of small blocks.
- runtime_MGetSizeClassInfo(s->sizeclass, &size, &npages, &n);
- gcrefp = s->gcref;
- gcrefep = s->gcref + n;
- for(; gcrefp < gcrefep; gcrefp++, p += size) {
- ref = *gcrefp;
- if(ref < RefNone) // RefFree or RefStack
- continue;
- switch(ref & ~(RefFlags^RefHasFinalizer)) {
- case RefNone:
- // Free small object.
- if(ref & RefProfiled)
+ off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;
+ bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ bits = *bitp>>shift;
+
+ if((bits & bitAllocated) == 0)
+ continue;
+
+ if((bits & bitMarked) != 0) {
+ *bitp &= ~(bitMarked<<shift);
+ continue;
+ }
+
+ if((bits & bitSpecial) != 0) {
+ // Special means it has a finalizer or is being profiled.
+ f = runtime_getfinalizer(p, 1);
+ if(f != nil) {
+ f->arg = p;
+ f->next = finq;
+ finq = f;
+ continue;
+ }
runtime_MProf_Free(p, size);
- *gcrefp = RefFree;
- c = m->mcache;
- if(size > (int32)sizeof(uintptr))
- ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
+ }
+
+ // Mark freed; restore block boundary bit.
+ *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+
+ if(s->sizeclass == 0) {
+ // Free large span.
+ runtime_unmarkspan(p, 1<<PageShift);
+ *(uintptr*)p = 1; // needs zeroing
+ runtime_MHeap_Free(&runtime_mheap, s, 1);
+ } else {
+ // Free small object.
+ c = m->mcache;
+ if(size > sizeof(uintptr))
+ ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
+ mstats.by_size[s->sizeclass].nfree++;
+ runtime_MCache_Free(c, p, s->sizeclass, size);
+ }
mstats.alloc -= size;
mstats.nfree++;
- mstats.by_size[s->sizeclass].nfree++;
- runtime_MCache_Free(c, p, s->sizeclass, size);
- break;
- case RefNone|RefHasFinalizer:
- f = runtime_getfinalizer(p, 1);
- if(f == nil)
- runtime_throw("finalizer inconsistency");
- f->arg = p;
- f->next = finq;
- finq = f;
- ref &= ~RefHasFinalizer;
- // fall through
- case RefSome:
- case RefSome|RefHasFinalizer:
- *gcrefp = RefNone | (ref&RefFlags);
- break;
}
}
}
-static void
-sweep(void)
-{
- MSpan *s;
-
- for(s = runtime_mheap.allspans; s != nil; s = s->allnext)
- if(s->state == MSpanInUse)
- sweepspan(s);
-}
-
static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
// Initialized from $GOGC. GOGC=off means no gc.
@@ -286,7 +473,8 @@ static int32 gcpercent = -2;
void
runtime_gc(int32 force __attribute__ ((unused)))
{
- int64 t0, t1;
+ int64 t0, t1, t2, t3;
+ uint64 heap0, heap1, obj0, obj1;
char *p;
Finalizer *fp;
@@ -309,29 +497,65 @@ runtime_gc(int32 force __attribute__ ((unused)))
gcpercent = -1;
else
gcpercent = runtime_atoi(p);
+
+ p = runtime_getenv("GOGCTRACE");
+ if(p != nil)
+ gctrace = runtime_atoi(p);
}
if(gcpercent < 0)
return;
pthread_mutex_lock(&finqlock);
pthread_mutex_lock(&gcsema);
- m->locks++; // disable gc during the mallocs in newproc
+ if(!force && mstats.heap_alloc < mstats.next_gc) {
+ pthread_mutex_unlock(&gcsema);
+ pthread_mutex_unlock(&finqlock);
+ return;
+ }
+
t0 = runtime_nanotime();
+ nlookup = 0;
+ nsizelookup = 0;
+ naddrlookup = 0;
+
+ m->gcing = 1;
runtime_stoptheworld();
- if(force || mstats.heap_alloc >= mstats.next_gc) {
- __go_cachestats();
- mark();
- sweep();
- __go_stealcache();
- mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
- }
+ if(runtime_mheap.Lock.key != 0)
+ runtime_throw("runtime_mheap locked during gc");
+ __go_cachestats();
+ heap0 = mstats.heap_alloc;
+ obj0 = mstats.nmalloc - mstats.nfree;
+
+ mark();
t1 = runtime_nanotime();
+ sweep();
+ t2 = runtime_nanotime();
+ __go_stealcache();
+
+ mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
+ m->gcing = 0;
+
+ m->locks++; // disable gc during the mallocs in newproc
+
+ heap1 = mstats.heap_alloc;
+ obj1 = mstats.nmalloc - mstats.nfree;
+
+ t3 = runtime_nanotime();
+ mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
+ mstats.pause_total_ns += t3 - t0;
mstats.numgc++;
- mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t1 - t0;
- mstats.pause_total_ns += t1 - t0;
if(mstats.debuggc)
- runtime_printf("pause %llu\n", (unsigned long long)t1-t0);
+ runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
+
+ if(gctrace) {
+ runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n",
+ mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
+ (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
+ (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
+ (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup);
+ }
+
pthread_mutex_unlock(&gcsema);
runtime_starttheworld();
@@ -350,6 +574,9 @@ runtime_gc(int32 force __attribute__ ((unused)))
}
m->locks--;
pthread_mutex_unlock(&finqlock);
+
+ if(gctrace > 1 && !force)
+ runtime_gc(1);
}
static void
@@ -385,6 +612,202 @@ runfinq(void* dummy)
}
}
+#define runtime_gomaxprocs 2
+
+// mark the block at v of size n as allocated.
+// If noptr is true, mark it as having no pointers.
+void
+runtime_markallocated(void *v, uintptr n, bool noptr)
+{
+ uintptr *b, obits, bits, off, shift;
+
+ // if(0)
+ // runtime_printf("markallocated %p+%p\n", v, n);
+
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ runtime_throw("markallocated: bad pointer");
+
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+
+ for(;;) {
+ obits = *b;
+ bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
+ if(noptr)
+ bits |= bitNoPointers<<shift;
+ if(runtime_gomaxprocs == 1) {
+ *b = bits;
+ break;
+ } else {
+ // gomaxprocs > 1: use atomic op
+ if(runtime_casp((void**)b, (void*)obits, (void*)bits))
+ break;
+ }
+ }
+}
+
+// mark the block at v of size n as freed.
+void
+runtime_markfreed(void *v, uintptr n)
+{
+ uintptr *b, obits, bits, off, shift;
+
+ // if(0)
+ // runtime_printf("markallocated %p+%p\n", v, n);
+
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ runtime_throw("markallocated: bad pointer");
+
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+
+ for(;;) {
+ obits = *b;
+ bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+ if(runtime_gomaxprocs == 1) {
+ *b = bits;
+ break;
+ } else {
+ // gomaxprocs > 1: use atomic op
+ if(runtime_casp((void**)b, (void*)obits, (void*)bits))
+ break;
+ }
+ }
+}
+
+// check that the block at v of size n is marked freed.
+void
+runtime_checkfreed(void *v, uintptr n)
+{
+ uintptr *b, bits, off, shift;
+
+ if(!runtime_checking)
+ return;
+
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ return; // not allocated, so okay
+
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+
+ bits = *b>>shift;
+ if((bits & bitAllocated) != 0) {
+ runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
+ v, (void*)n, (void*)off, (void*)(bits & bitMask));
+ runtime_throw("checkfreed: not freed");
+ }
+}
+
+// mark the span of memory at v as having n blocks of the given size.
+// if leftover is true, there is left over space at the end of the span.
+void
+runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
+{
+ uintptr *b, off, shift;
+ byte *p;
+
+ if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ runtime_throw("markspan: bad pointer");
+
+ p = v;
+ if(leftover) // mark a boundary just past end of last block too
+ n++;
+ for(; n-- > 0; p += size) {
+ // Okay to use non-atomic ops here, because we control
+ // the entire span, and each bitmap word has bits for only
+ // one span, so no other goroutines are changing these
+ // bitmap words.
+ off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+ }
+}
+
+// unmark the span of memory at v of length n bytes.
+void
+runtime_unmarkspan(void *v, uintptr n)
+{
+ uintptr *p, *b, off;
+
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ runtime_throw("markspan: bad pointer");
+
+ p = v;
+ off = p - (uintptr*)runtime_mheap.arena_start; // word offset
+ if(off % wordsPerBitmapWord != 0)
+ runtime_throw("markspan: unaligned pointer");
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ n /= PtrSize;
+ if(n%wordsPerBitmapWord != 0)
+ runtime_throw("unmarkspan: unaligned length");
+ // Okay to use non-atomic ops here, because we control
+ // the entire span, and each bitmap word has bits for only
+ // one span, so no other goroutines are changing these
+ // bitmap words.
+ n /= wordsPerBitmapWord;
+ while(n-- > 0)
+ *b-- = 0;
+}
+
+bool
+runtime_blockspecial(void *v)
+{
+ uintptr *b, off, shift;
+
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+
+ return (*b & (bitSpecial<<shift)) != 0;
+}
+
+void
+runtime_setblockspecial(void *v)
+{
+ uintptr *b, off, shift, bits, obits;
+
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+
+ for(;;) {
+ obits = *b;
+ bits = obits | (bitSpecial<<shift);
+ if(runtime_gomaxprocs == 1) {
+ *b = bits;
+ break;
+ } else {
+ // gomaxprocs > 1: use atomic op
+ if(runtime_casp((void**)b, (void*)obits, (void*)bits))
+ break;
+ }
+ }
+}
+
+void
+runtime_MHeap_MapBits(MHeap *h)
+{
+ // Caller has added extra mappings to the arena.
+ // Add extra mappings of bitmap words as needed.
+ // We allocate extra bitmap pieces in chunks of bitmapChunk.
+ enum {
+ bitmapChunk = 8192
+ };
+ uintptr n;
+
+ n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
+ n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
+ if(h->bitmap_mapped >= n)
+ return;
+
+ runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
+ h->bitmap_mapped = n;
+}
+
void
__go_enable_gc()
{
diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c
index 52c6d8c1baa..b36df258818 100644
--- a/libgo/runtime/mheap.c
+++ b/libgo/runtime/mheap.c
@@ -42,7 +42,6 @@ runtime_MHeap_Init(MHeap *h, void *(*alloc)(uintptr))
runtime_initlock(h);
runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), alloc, RecordSpan, h);
runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), alloc, nil, nil);
- runtime_MHeapMap_Init(&h->map, alloc);
// h->mapcache needs no init
for(i=0; i<nelem(h->free); i++)
runtime_MSpanList_Init(&h->free[i]);
@@ -80,6 +79,7 @@ MHeap_AllocLocked(MHeap *h, uintptr npage, int32 sizeclass)
{
uintptr n;
MSpan *s, *t;
+ PageID p;
// Try in fixed-size lists up to max.
for(n=npage; n < nelem(h->free); n++) {
@@ -113,18 +113,29 @@ HaveSpan:
mstats.mspan_sys = h->spanalloc.sys;
runtime_MSpan_Init(t, s->start + npage, s->npages - npage);
s->npages = npage;
- runtime_MHeapMap_Set(&h->map, t->start - 1, s);
- runtime_MHeapMap_Set(&h->map, t->start, t);
- runtime_MHeapMap_Set(&h->map, t->start + t->npages - 1, t);
+ p = t->start;
+ if(sizeof(void*) == 8)
+ p -= ((uintptr)h->arena_start>>PageShift);
+ if(p > 0)
+ h->map[p-1] = s;
+ h->map[p] = t;
+ h->map[p+t->npages-1] = t;
+ *(uintptr*)(t->start<<PageShift) = *(uintptr*)(s->start<<PageShift); // copy "needs zeroing" mark
t->state = MSpanInUse;
MHeap_FreeLocked(h, t);
}
+ if(*(uintptr*)(s->start<<PageShift) != 0)
+ runtime_memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
+
// Record span info, because gc needs to be
// able to map interior pointer to containing span.
s->sizeclass = sizeclass;
+ p = s->start;
+ if(sizeof(void*) == 8)
+ p -= ((uintptr)h->arena_start>>PageShift);
for(n=0; n<npage; n++)
- runtime_MHeapMap_Set(&h->map, s->start+n, s);
+ h->map[p+n] = s;
return s;
}
@@ -162,6 +173,7 @@ MHeap_Grow(MHeap *h, uintptr npage)
uintptr ask;
void *v;
MSpan *s;
+ PageID p;
// Ask for a big chunk, to reduce the number of mappings
// the operating system needs to track; also amortizes
@@ -169,68 +181,72 @@ MHeap_Grow(MHeap *h, uintptr npage)
// Allocate a multiple of 64kB (16 pages).
npage = (npage+15)&~15;
ask = npage<<PageShift;
- if(ask < HeapAllocChunk)
+ if(ask > (uintptr)(h->arena_end - h->arena_used))
+ return false;
+ if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used)
ask = HeapAllocChunk;
- v = runtime_SysAlloc(ask);
+ v = runtime_MHeap_SysAlloc(h, ask);
if(v == nil) {
if(ask > (npage<<PageShift)) {
ask = npage<<PageShift;
- v = runtime_SysAlloc(ask);
+ v = runtime_MHeap_SysAlloc(h, ask);
}
if(v == nil)
return false;
}
mstats.heap_sys += ask;
- if((byte*)v < h->min || h->min == nil)
- h->min = v;
- if((byte*)v+ask > h->max)
- h->max = (byte*)v+ask;
-
- // NOTE(rsc): In tcmalloc, if we've accumulated enough
- // system allocations, the heap map gets entirely allocated
- // in 32-bit mode. (In 64-bit mode that's not practical.)
- if(!runtime_MHeapMap_Preallocate(&h->map, ((uintptr)v>>PageShift) - 1, (ask>>PageShift) + 2)) {
- runtime_SysFree(v, ask);
- return false;
- }
-
// Create a fake "in use" span and free it, so that the
// right coalescing happens.
s = runtime_FixAlloc_Alloc(&h->spanalloc);
mstats.mspan_inuse = h->spanalloc.inuse;
mstats.mspan_sys = h->spanalloc.sys;
runtime_MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift);
- runtime_MHeapMap_Set(&h->map, s->start, s);
- runtime_MHeapMap_Set(&h->map, s->start + s->npages - 1, s);
+ p = s->start;
+ if(sizeof(void*) == 8)
+ p -= ((uintptr)h->arena_start>>PageShift);
+ h->map[p] = s;
+ h->map[p + s->npages - 1] = s;
s->state = MSpanInUse;
MHeap_FreeLocked(h, s);
return true;
}
-// Look up the span at the given page number.
-// Page number is guaranteed to be in map
+// Look up the span at the given address.
+// Address is guaranteed to be in map
// and is guaranteed to be start or end of span.
MSpan*
-runtime_MHeap_Lookup(MHeap *h, PageID p)
+runtime_MHeap_Lookup(MHeap *h, void *v)
{
- return runtime_MHeapMap_Get(&h->map, p);
+ uintptr p;
+
+ p = (uintptr)v;
+ if(sizeof(void*) == 8)
+ p -= (uintptr)h->arena_start;
+ return h->map[p >> PageShift];
}
-// Look up the span at the given page number.
-// Page number is *not* guaranteed to be in map
+// Look up the span at the given address.
+// Address is *not* guaranteed to be in map
// and may be anywhere in the span.
// Map entries for the middle of a span are only
// valid for allocated spans. Free spans may have
// other garbage in their middles, so we have to
// check for that.
MSpan*
-runtime_MHeap_LookupMaybe(MHeap *h, PageID p)
+runtime_MHeap_LookupMaybe(MHeap *h, void *v)
{
MSpan *s;
+ PageID p, q;
- s = runtime_MHeapMap_GetMaybe(&h->map, p);
+ if((byte*)v < h->arena_start || (byte*)v >= h->arena_used)
+ return nil;
+ p = (uintptr)v>>PageShift;
+ q = p;
+ if(sizeof(void*) == 8)
+ q -= (uintptr)h->arena_start >> PageShift;
+ s = h->map[q];
if(s == nil || p < s->start || p - s->start >= s->npages)
return nil;
if(s->state != MSpanInUse)
@@ -259,7 +275,9 @@ runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct)
static void
MHeap_FreeLocked(MHeap *h, MSpan *s)
{
+ uintptr *sp, *tp;
MSpan *t;
+ PageID p;
if(s->state != MSpanInUse || s->ref != 0) {
// runtime_printf("MHeap_FreeLocked - span %p ptr %p state %d ref %d\n", s, s->start<<PageShift, s->state, s->ref);
@@ -267,21 +285,30 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
}
s->state = MSpanFree;
runtime_MSpanList_Remove(s);
+ sp = (uintptr*)(s->start<<PageShift);
// Coalesce with earlier, later spans.
- if((t = runtime_MHeapMap_Get(&h->map, s->start - 1)) != nil && t->state != MSpanInUse) {
+ p = s->start;
+ if(sizeof(void*) == 8)
+ p -= (uintptr)h->arena_start >> PageShift;
+ if(p > 0 && (t = h->map[p-1]) != nil && t->state != MSpanInUse) {
+ tp = (uintptr*)(t->start<<PageShift);
+ *tp |= *sp; // propagate "needs zeroing" mark
s->start = t->start;
s->npages += t->npages;
- runtime_MHeapMap_Set(&h->map, s->start, s);
+ p -= t->npages;
+ h->map[p] = s;
runtime_MSpanList_Remove(t);
t->state = MSpanDead;
runtime_FixAlloc_Free(&h->spanalloc, t);
mstats.mspan_inuse = h->spanalloc.inuse;
mstats.mspan_sys = h->spanalloc.sys;
}
- if((t = runtime_MHeapMap_Get(&h->map, s->start + s->npages)) != nil && t->state != MSpanInUse) {
+ if(p+s->npages < nelem(h->map) && (t = h->map[p+s->npages]) != nil && t->state != MSpanInUse) {
+ tp = (uintptr*)(t->start<<PageShift);
+ *sp |= *tp; // propagate "needs zeroing" mark
s->npages += t->npages;
- runtime_MHeapMap_Set(&h->map, s->start + s->npages - 1, s);
+ h->map[p + s->npages - 1] = s;
runtime_MSpanList_Remove(t);
t->state = MSpanDead;
runtime_FixAlloc_Free(&h->spanalloc, t);
@@ -341,10 +368,14 @@ runtime_MSpanList_IsEmpty(MSpan *list)
void
runtime_MSpanList_Insert(MSpan *list, MSpan *span)
{
- if(span->next != nil || span->prev != nil)
+ if(span->next != nil || span->prev != nil) {
+ // runtime_printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
runtime_throw("MSpanList_Insert");
+ }
span->next = list->next;
span->prev = list;
span->next->prev = span;
span->prev->next = span;
}
+
+
diff --git a/libgo/runtime/mheapmap32.c b/libgo/runtime/mheapmap32.c
deleted file mode 100644
index 547c602fe3f..00000000000
--- a/libgo/runtime/mheapmap32.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Heap map, 32-bit version
-// See malloc.h and mheap.c for overview.
-
-#include "runtime.h"
-#include "malloc.h"
-
-#if __SIZEOF_POINTER__ == 4
-
-// 3-level radix tree mapping page ids to Span*.
-void
-runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr))
-{
- m->allocator = allocator;
-}
-
-MSpan*
-runtime_MHeapMap_Get(MHeapMap *m, PageID k)
-{
- int32 i1, i2;
-
- i2 = k & MHeapMap_Level2Mask;
- k >>= MHeapMap_Level2Bits;
- i1 = k & MHeapMap_Level1Mask;
- k >>= MHeapMap_Level1Bits;
- if(k != 0)
- runtime_throw("MHeapMap_Get");
-
- return m->p[i1]->s[i2];
-}
-
-MSpan*
-runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k)
-{
- int32 i1, i2;
- MHeapMapNode2 *p2;
-
- i2 = k & MHeapMap_Level2Mask;
- k >>= MHeapMap_Level2Bits;
- i1 = k & MHeapMap_Level1Mask;
- k >>= MHeapMap_Level1Bits;
- if(k != 0)
- runtime_throw("MHeapMap_Get");
-
- p2 = m->p[i1];
- if(p2 == nil)
- return nil;
- return p2->s[i2];
-}
-
-void
-runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *s)
-{
- int32 i1, i2;
-
- i2 = k & MHeapMap_Level2Mask;
- k >>= MHeapMap_Level2Bits;
- i1 = k & MHeapMap_Level1Mask;
- k >>= MHeapMap_Level1Bits;
- if(k != 0)
- runtime_throw("MHeapMap_Set");
-
- m->p[i1]->s[i2] = s;
-}
-
-// Allocate the storage required for entries [k, k+1, ..., k+len-1]
-// so that Get and Set calls need not check for nil pointers.
-bool
-runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr len)
-{
- uintptr end;
- int32 i1;
- MHeapMapNode2 *p2;
-
- end = k+len;
- while(k < end) {
- if((k >> MHeapMap_TotalBits) != 0)
- return false;
- i1 = (k >> MHeapMap_Level2Bits) & MHeapMap_Level1Mask;
-
- // first-level pointer
- if(m->p[i1] == nil) {
- p2 = m->allocator(sizeof *p2);
- if(p2 == nil)
- return false;
- mstats.heapmap_sys += sizeof *p2;
- m->p[i1] = p2;
- }
-
- // advance key past this leaf node
- k = ((k >> MHeapMap_Level2Bits) + 1) << MHeapMap_Level2Bits;
- }
- return true;
-}
-
-#endif /* __SIZEOF_POINTER__ == 4 */
diff --git a/libgo/runtime/mheapmap32.h b/libgo/runtime/mheapmap32.h
deleted file mode 100644
index 2861624690f..00000000000
--- a/libgo/runtime/mheapmap32.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Free(v) must be able to determine the MSpan containing v.
-// The MHeapMap is a 2-level radix tree mapping page numbers to MSpans.
-
-typedef struct MHeapMapNode2 MHeapMapNode2;
-
-enum
-{
- // 32 bit address - 12 bit page size = 20 bits to map
- MHeapMap_Level1Bits = 10,
- MHeapMap_Level2Bits = 10,
-
- MHeapMap_TotalBits =
- MHeapMap_Level1Bits +
- MHeapMap_Level2Bits,
-
- MHeapMap_Level1Mask = (1<<MHeapMap_Level1Bits) - 1,
- MHeapMap_Level2Mask = (1<<MHeapMap_Level2Bits) - 1,
-};
-
-struct MHeapMap
-{
- void *(*allocator)(uintptr);
- MHeapMapNode2 *p[1<<MHeapMap_Level1Bits];
-};
-
-struct MHeapMapNode2
-{
- MSpan *s[1<<MHeapMap_Level2Bits];
-};
-
-void runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr));
-bool runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr npages);
-MSpan* runtime_MHeapMap_Get(MHeapMap *m, PageID k);
-MSpan* runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k);
-void runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *v);
-
-
diff --git a/libgo/runtime/mheapmap64.c b/libgo/runtime/mheapmap64.c
deleted file mode 100644
index d6305953ad5..00000000000
--- a/libgo/runtime/mheapmap64.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Heap map, 64-bit version
-// See malloc.h and mheap.c for overview.
-
-#include "runtime.h"
-#include "malloc.h"
-
-#if __SIZEOF_POINTER__ == 8
-
-// 3-level radix tree mapping page ids to Span*.
-void
-runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr))
-{
- m->allocator = allocator;
-}
-
-MSpan*
-runtime_MHeapMap_Get(MHeapMap *m, PageID k)
-{
- int32 i1, i2, i3;
-
- i3 = k & MHeapMap_Level3Mask;
- k >>= MHeapMap_Level3Bits;
- i2 = k & MHeapMap_Level2Mask;
- k >>= MHeapMap_Level2Bits;
- i1 = k & MHeapMap_Level1Mask;
- k >>= MHeapMap_Level1Bits;
- if(k != 0)
- runtime_throw("MHeapMap_Get");
-
- return m->p[i1]->p[i2]->s[i3];
-}
-
-MSpan*
-runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k)
-{
- int32 i1, i2, i3;
- MHeapMapNode2 *p2;
- MHeapMapNode3 *p3;
-
- i3 = k & MHeapMap_Level3Mask;
- k >>= MHeapMap_Level3Bits;
- i2 = k & MHeapMap_Level2Mask;
- k >>= MHeapMap_Level2Bits;
- i1 = k & MHeapMap_Level1Mask;
- k >>= MHeapMap_Level1Bits;
- if(k != 0)
- runtime_throw("MHeapMap_Get");
-
- p2 = m->p[i1];
- if(p2 == nil)
- return nil;
- p3 = p2->p[i2];
- if(p3 == nil)
- return nil;
- return p3->s[i3];
-}
-
-void
-runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *s)
-{
- int32 i1, i2, i3;
-
- i3 = k & MHeapMap_Level3Mask;
- k >>= MHeapMap_Level3Bits;
- i2 = k & MHeapMap_Level2Mask;
- k >>= MHeapMap_Level2Bits;
- i1 = k & MHeapMap_Level1Mask;
- k >>= MHeapMap_Level1Bits;
- if(k != 0)
- runtime_throw("MHeapMap_Set");
-
- m->p[i1]->p[i2]->s[i3] = s;
-}
-
-// Allocate the storage required for entries [k, k+1, ..., k+len-1]
-// so that Get and Set calls need not check for nil pointers.
-bool
-runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr len)
-{
- uintptr end;
- int32 i1, i2;
- MHeapMapNode2 *p2;
- MHeapMapNode3 *p3;
-
- end = k+len;
- while(k < end) {
- if((k >> MHeapMap_TotalBits) != 0)
- return false;
- i2 = (k >> MHeapMap_Level3Bits) & MHeapMap_Level2Mask;
- i1 = (k >> (MHeapMap_Level3Bits + MHeapMap_Level2Bits)) & MHeapMap_Level1Mask;
-
- // first-level pointer
- if((p2 = m->p[i1]) == nil) {
- p2 = m->allocator(sizeof *p2);
- if(p2 == nil)
- return false;
- mstats.heapmap_sys += sizeof *p2;
- m->p[i1] = p2;
- }
-
- // second-level pointer
- if(p2->p[i2] == nil) {
- p3 = m->allocator(sizeof *p3);
- if(p3 == nil)
- return false;
- mstats.heapmap_sys += sizeof *p3;
- p2->p[i2] = p3;
- }
-
- // advance key past this leaf node
- k = ((k >> MHeapMap_Level3Bits) + 1) << MHeapMap_Level3Bits;
- }
- return true;
-}
-
-#endif /* __SIZEOF_POINTER__ == 8 */
diff --git a/libgo/runtime/mheapmap64.h b/libgo/runtime/mheapmap64.h
deleted file mode 100644
index be304cb2e8b..00000000000
--- a/libgo/runtime/mheapmap64.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Free(v) must be able to determine the MSpan containing v.
-// The MHeapMap is a 3-level radix tree mapping page numbers to MSpans.
-//
-// NOTE(rsc): On a 32-bit platform (= 20-bit page numbers),
-// we can swap in a 2-level radix tree.
-//
-// NOTE(rsc): We use a 3-level tree because tcmalloc does, but
-// having only three levels requires approximately 1 MB per node
-// in the tree, making the minimum map footprint 3 MB.
-// Using a 4-level tree would cut the minimum footprint to 256 kB.
-// On the other hand, it's just virtual address space: most of
-// the memory is never going to be touched, thus never paged in.
-
-typedef struct MHeapMapNode2 MHeapMapNode2;
-typedef struct MHeapMapNode3 MHeapMapNode3;
-
-enum
-{
- // 64 bit address - 12 bit page size = 52 bits to map
- MHeapMap_Level1Bits = 18,
- MHeapMap_Level2Bits = 18,
- MHeapMap_Level3Bits = 16,
-
- MHeapMap_TotalBits =
- MHeapMap_Level1Bits +
- MHeapMap_Level2Bits +
- MHeapMap_Level3Bits,
-
- MHeapMap_Level1Mask = (1<<MHeapMap_Level1Bits) - 1,
- MHeapMap_Level2Mask = (1<<MHeapMap_Level2Bits) - 1,
- MHeapMap_Level3Mask = (1<<MHeapMap_Level3Bits) - 1,
-};
-
-struct MHeapMap
-{
- void *(*allocator)(uintptr);
- MHeapMapNode2 *p[1<<MHeapMap_Level1Bits];
-};
-
-struct MHeapMapNode2
-{
- MHeapMapNode3 *p[1<<MHeapMap_Level2Bits];
-};
-
-struct MHeapMapNode3
-{
- MSpan *s[1<<MHeapMap_Level3Bits];
-};
-
-void runtime_MHeapMap_Init(MHeapMap *m, void *(*allocator)(uintptr));
-bool runtime_MHeapMap_Preallocate(MHeapMap *m, PageID k, uintptr npages);
-MSpan* runtime_MHeapMap_Get(MHeapMap *m, PageID k);
-MSpan* runtime_MHeapMap_GetMaybe(MHeapMap *m, PageID k);
-void runtime_MHeapMap_Set(MHeapMap *m, PageID k, MSpan *v);
-
-
diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc
index 6bd4ef72724..2e147edda02 100644
--- a/libgo/runtime/mprof.goc
+++ b/libgo/runtime/mprof.goc
@@ -67,7 +67,7 @@ stkbucket(uintptr *stk, int32 nstk)
runtime_mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b;
- b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1);
+ b = runtime_mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
b->hash = h;
@@ -134,7 +134,7 @@ setaddrbucket(uintptr addr, Bucket *b)
if(ah->addr == (addr>>20))
goto found;
- ah = runtime_mallocgc(sizeof *ah, RefNoProfiling, 0, 1);
+ ah = runtime_mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
addrmem += sizeof *ah;
ah->next = addrhash[h];
ah->addr = addr>>20;
@@ -142,7 +142,7 @@ setaddrbucket(uintptr addr, Bucket *b)
found:
if((e = addrfree) == nil) {
- e = runtime_mallocgc(64*sizeof *e, RefNoProfiling, 0, 0);
+ e = runtime_mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
addrmem += 64*sizeof *e;
for(i=0; i+1<64; i++)
e[i].next = &e[i+1];
diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c
index 8b021a2b6b3..6e82885bab4 100644
--- a/libgo/runtime/msize.c
+++ b/libgo/runtime/msize.c
@@ -57,7 +57,7 @@ runtime_SizeToClass(int32 size)
void
runtime_InitSizes(void)
{
- int32 align, sizeclass, size, osize, nextsize, n;
+ int32 align, sizeclass, size, nextsize, n;
uint32 i;
uintptr allocsize, npages;
@@ -81,8 +81,7 @@ runtime_InitSizes(void)
// the leftover is less than 1/8 of the total,
// so wasted space is at most 12.5%.
allocsize = PageSize;
- osize = size + RefcountOverhead;
- while(allocsize%osize > (allocsize/8))
+ while(allocsize%size > allocsize/8)
allocsize += PageSize;
npages = allocsize >> PageShift;
@@ -93,7 +92,7 @@ runtime_InitSizes(void)
// different sizes.
if(sizeclass > 1
&& (int32)npages == runtime_class_to_allocnpages[sizeclass-1]
- && allocsize/osize == allocsize/(runtime_class_to_size[sizeclass-1]+RefcountOverhead)) {
+ && allocsize/size == allocsize/runtime_class_to_size[sizeclass-1]) {
runtime_class_to_size[sizeclass-1] = size;
continue;
}
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index 95216e4a5ca..011ba7dab6b 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -185,6 +185,7 @@ void runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64));
#define runtime_mmap mmap
#define runtime_munmap(p, s) munmap((p), (s))
#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
struct __go_func_type;
void reflect_call(const struct __go_func_type *, const void *, _Bool, void **,
diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc
index b5f2954bc8e..7cbd739e51e 100644
--- a/libgo/runtime/sigqueue.goc
+++ b/libgo/runtime/sigqueue.goc
@@ -102,7 +102,7 @@ func Signame(sig int32) (name String) {
s = buf;
}
int32 len = __builtin_strlen(s);
- unsigned char *data = runtime_mallocgc(len, RefNoPointers, 0, 0);
+ unsigned char *data = runtime_mallocgc(len, FlagNoPointers, 0, 0);
__builtin_memcpy(data, s, len);
name.__data = data;
name.__length = len;
OpenPOWER on IntegriCloud