/* Copyright 2013-2018 IBM Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include /* Memory poisoning on free (if POISON_MEM_REGION set to 1) */ #ifdef DEBUG #define POISON_MEM_REGION 1 #else #define POISON_MEM_REGION 0 #endif #define POISON_MEM_REGION_WITH 0x99 #define POISON_MEM_REGION_LIMIT 1*1024*1024*1024 /* Locking: The mem_region_lock protects the regions list from concurrent * updates. Additions to, or removals from, the region list must be done * with this lock held. This is typically done when we're establishing * the memory & reserved regions. * * Each region has a lock (region->free_list_lock) to protect the free list * from concurrent modification. This lock is used when we're allocating * memory out of a specific region. * * If both locks are needed (eg, __local_alloc, where we need to find a region, * then allocate from it), the mem_region_lock must be acquired before (and * released after) the per-region lock. */ struct lock mem_region_lock = LOCK_UNLOCKED; static struct list_head regions = LIST_HEAD_INIT(regions); static struct list_head early_reserves = LIST_HEAD_INIT(early_reserves); static bool mem_region_init_done = false; static bool mem_regions_finalised = false; unsigned long top_of_ram = SKIBOOT_BASE + SKIBOOT_SIZE; static struct mem_region skiboot_os_reserve = { .name = "ibm,os-reserve", .start = 0, .len = SKIBOOT_BASE, .type = REGION_OS, }; struct mem_region skiboot_heap = { .name = "ibm,firmware-heap", .start = HEAP_BASE, .len = HEAP_SIZE, .type = REGION_SKIBOOT_HEAP, }; static struct mem_region skiboot_code_and_text = { .name = "ibm,firmware-code", .start = SKIBOOT_BASE, .len = HEAP_BASE - SKIBOOT_BASE, .type = REGION_SKIBOOT_FIRMWARE, }; static struct mem_region skiboot_after_heap = { .name = "ibm,firmware-data", .start = HEAP_BASE + HEAP_SIZE, .len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE), .type = REGION_SKIBOOT_FIRMWARE, }; static struct mem_region skiboot_cpu_stacks = { .name = "ibm,firmware-stacks", .start = CPU_STACKS_BASE, .len = 0, /* TBA */ .type = REGION_SKIBOOT_FIRMWARE, }; static struct mem_region skiboot_mambo_kernel = { .name = "ibm,firmware-mambo-kernel", .start = (unsigned long)KERNEL_LOAD_BASE, .len = KERNEL_LOAD_SIZE, .type = REGION_SKIBOOT_FIRMWARE, }; static struct mem_region skiboot_mambo_initramfs = { .name = "ibm,firmware-mambo-initramfs", .start = (unsigned long)INITRAMFS_LOAD_BASE, .len = INITRAMFS_LOAD_SIZE, .type = REGION_SKIBOOT_FIRMWARE, }; struct alloc_hdr { bool free : 1; bool prev_free : 1; bool printed : 1; unsigned long num_longs : BITS_PER_LONG-3; /* Including header. */ const char *location; }; struct free_hdr { struct alloc_hdr hdr; struct list_node list; /* ... unsigned long tailer; */ }; #define ALLOC_HDR_LONGS (sizeof(struct alloc_hdr) / sizeof(long)) #define ALLOC_MIN_LONGS (sizeof(struct free_hdr) / sizeof(long) + 1) /* Avoid ugly casts. */ static void *region_start(const struct mem_region *region) { return (void *)(unsigned long)region->start; } /* Each free block has a tailer, so we can walk backwards. */ static unsigned long *tailer(struct free_hdr *f) { return (unsigned long *)f + f->hdr.num_longs - 1; } /* This walks forward to the next hdr (or NULL if at the end). */ static struct alloc_hdr *next_hdr(const struct mem_region *region, const struct alloc_hdr *hdr) { void *next; next = ((unsigned long *)hdr + hdr->num_longs); if (next >= region_start(region) + region->len) next = NULL; return next; } #if POISON_MEM_REGION == 1 static void mem_poison(struct free_hdr *f) { size_t poison_size = (void*)tailer(f) - (void*)(f+1); /* We only poison up to a limit, as otherwise boot is * kinda slow */ if (poison_size > POISON_MEM_REGION_LIMIT) poison_size = POISON_MEM_REGION_LIMIT; memset(f+1, POISON_MEM_REGION_WITH, poison_size); } #endif /* Creates free block covering entire region. */ static void init_allocatable_region(struct mem_region *region) { struct free_hdr *f = region_start(region); assert(region->type == REGION_SKIBOOT_HEAP || region->type == REGION_MEMORY); f->hdr.num_longs = region->len / sizeof(long); f->hdr.free = true; f->hdr.prev_free = false; *tailer(f) = f->hdr.num_longs; list_head_init(®ion->free_list); list_add(®ion->free_list, &f->list); #if POISON_MEM_REGION == 1 mem_poison(f); #endif } static void make_free(struct mem_region *region, struct free_hdr *f, const char *location, bool skip_poison) { struct alloc_hdr *next; #if POISON_MEM_REGION == 1 if (!skip_poison) mem_poison(f); #else (void)skip_poison; #endif if (f->hdr.prev_free) { struct free_hdr *prev; unsigned long *prev_tailer = (unsigned long *)f - 1; assert(*prev_tailer); prev = (void *)((unsigned long *)f - *prev_tailer); assert(prev->hdr.free); assert(!prev->hdr.prev_free); /* Expand to cover the one we just freed. */ prev->hdr.num_longs += f->hdr.num_longs; f = prev; } else { f->hdr.free = true; f->hdr.location = location; list_add(®ion->free_list, &f->list); } /* Fix up tailer. */ *tailer(f) = f->hdr.num_longs; /* If next is free, coalesce it */ next = next_hdr(region, &f->hdr); if (next) { next->prev_free = true; if (next->free) { struct free_hdr *next_free = (void *)next; list_del_from(®ion->free_list, &next_free->list); /* Maximum of one level of recursion */ make_free(region, next_free, location, true); } } } /* Can we fit this many longs with this alignment in this free block? */ static bool fits(struct free_hdr *f, size_t longs, size_t align, size_t *offset) { *offset = 0; while (f->hdr.num_longs >= *offset + longs) { size_t addr; addr = (unsigned long)f + (*offset + ALLOC_HDR_LONGS) * sizeof(long); if ((addr & (align - 1)) == 0) return true; /* Don't make tiny chunks! */ if (*offset == 0) *offset = ALLOC_MIN_LONGS; else (*offset)++; } return false; } static void discard_excess(struct mem_region *region, struct alloc_hdr *hdr, size_t alloc_longs, const char *location, bool skip_poison) { /* Do we have excess? */ if (hdr->num_longs > alloc_longs + ALLOC_MIN_LONGS) { struct free_hdr *post; /* Set up post block. */ post = (void *)hdr + alloc_longs * sizeof(long); post->hdr.num_longs = hdr->num_longs - alloc_longs; post->hdr.prev_free = false; /* Trim our block. */ hdr->num_longs = alloc_longs; /* This coalesces as required. */ make_free(region, post, location, skip_poison); } } static const char *hdr_location(const struct alloc_hdr *hdr) { /* Corrupt: step carefully! */ if (is_rodata(hdr->location)) return hdr->location; return "*CORRUPT*"; } static void bad_header(const struct mem_region *region, const struct alloc_hdr *hdr, const char *during, const char *location) { /* Corrupt: step carefully! */ if (is_rodata(hdr->location)) prerror("%p (in %s) %s at %s, previously %s\n", hdr-1, region->name, during, location, hdr->location); else prerror("%p (in %s) %s at %s, previously %p\n", hdr-1, region->name, during, location, hdr->location); abort(); } static bool region_is_reservable(struct mem_region *region) { return region->type != REGION_OS; } static bool region_is_reserved(struct mem_region *region) { return region->type != REGION_OS && region->type != REGION_MEMORY; } void mem_dump_allocs(void) { struct mem_region *region; struct alloc_hdr *h, *i; /* Second pass: populate property data */ prlog(PR_INFO, "Memory regions:\n"); list_for_each(®ions, region, list) { if (!(region->type == REGION_SKIBOOT_HEAP || region->type == REGION_MEMORY)) continue; prlog(PR_INFO, " 0x%012llx..%012llx : %s\n", (long long)region->start, (long long)(region->start + region->len - 1), region->name); if (region->free_list.n.next == NULL) { prlog(PR_INFO, " no allocs\n"); continue; } /* * XXX: When dumping the allocation list we coalase allocations * with the same location and size into a single line. This is * quadratic, but it makes the dump human-readable and the raw * dump sometimes causes the log buffer to wrap. */ for (h = region_start(region); h; h = next_hdr(region, h)) h->printed = false; for (h = region_start(region); h; h = next_hdr(region, h)) { unsigned long bytes; int count = 0; if (h->free) continue; if (h->printed) continue; for (i = h; i; i = next_hdr(region, i)) { if (i->free) continue; if (i->num_longs != h->num_longs) continue; if (strcmp(i->location, h->location)) continue; i->printed = true; count++; } bytes = h->num_longs * sizeof(long); prlog(PR_NOTICE, " % 8d allocs of 0x%.8lx bytes at %s (total 0x%lx)\n", count, bytes, hdr_location(h), bytes * count); } } } int64_t mem_dump_free(void) { struct mem_region *region; struct alloc_hdr *hdr; int64_t total_free; int64_t region_free; total_free = 0; prlog(PR_INFO, "Free space in HEAP memory regions:\n"); list_for_each(®ions, region, list) { if (!(region->type == REGION_SKIBOOT_HEAP || region->type == REGION_MEMORY)) continue; region_free = 0; if (region->free_list.n.next == NULL) { continue; } for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) { if (!hdr->free) continue; region_free+= hdr->num_longs * sizeof(long); } prlog(PR_INFO, "Region %s free: %"PRIx64"\n", region->name, region_free); total_free += region_free; } prlog(PR_INFO, "Total free: %"PRIu64"\n", total_free); return total_free; } static void *__mem_alloc(struct mem_region *region, size_t size, size_t align, const char *location) { size_t alloc_longs, offset; struct free_hdr *f; struct alloc_hdr *next; /* Align must be power of 2. */ assert(!((align - 1) & align)); /* This should be a constant. */ assert(is_rodata(location)); /* Unallocatable region? */ if (!(region->type == REGION_SKIBOOT_HEAP || region->type == REGION_MEMORY)) return NULL; /* First allocation? */ if (region->free_list.n.next == NULL) init_allocatable_region(region); /* Don't do screwy sizes. */ if (size > region->len) return NULL; /* Don't do tiny alignments, we deal in long increments. */ if (align < sizeof(long)) align = sizeof(long); /* Convert size to number of longs, too. */ alloc_longs = (size + sizeof(long)-1) / sizeof(long) + ALLOC_HDR_LONGS; /* Can't be too small for when we free it, either. */ if (alloc_longs < ALLOC_MIN_LONGS) alloc_longs = ALLOC_MIN_LONGS; /* Walk free list. */ list_for_each(®ion->free_list, f, list) { /* We may have to skip some to meet alignment. */ if (fits(f, alloc_longs, align, &offset)) goto found; } return NULL; found: assert(f->hdr.free); assert(!f->hdr.prev_free); /* This block is no longer free. */ list_del_from(®ion->free_list, &f->list); f->hdr.free = false; f->hdr.location = location; next = next_hdr(region, &f->hdr); if (next) { assert(next->prev_free); next->prev_free = false; } if (offset != 0) { struct free_hdr *pre = f; f = (void *)f + offset * sizeof(long); assert(f >= pre + 1); /* Set up new header. */ f->hdr.num_longs = pre->hdr.num_longs - offset; /* f->hdr.prev_free will be set by make_free below. */ f->hdr.free = false; f->hdr.location = location; /* Fix up old header. */ pre->hdr.num_longs = offset; pre->hdr.prev_free = false; /* This coalesces as required. */ make_free(region, pre, location, true); } /* We might be too long; put the rest back. */ discard_excess(region, &f->hdr, alloc_longs, location, true); /* Clear tailer for debugging */ *tailer(f) = 0; /* Their pointer is immediately after header. */ return &f->hdr + 1; } void *mem_alloc(struct mem_region *region, size_t size, size_t align, const char *location) { static bool dumped = false; void *r; assert(lock_held_by_me(®ion->free_list_lock)); r = __mem_alloc(region, size, align, location); if (r) return r; prerror("mem_alloc(0x%lx, 0x%lx, \"%s\", %s) failed !\n", size, align, location, region->name); if (!dumped) { mem_dump_allocs(); dumped = true; } return NULL; } void mem_free(struct mem_region *region, void *mem, const char *location) { struct alloc_hdr *hdr; /* This should be a constant. */ assert(is_rodata(location)); assert(lock_held_by_me(®ion->free_list_lock)); /* Freeing NULL is always a noop. */ if (!mem) return; /* Your memory is in the region, right? */ assert(mem >= region_start(region) + sizeof(*hdr)); assert(mem < region_start(region) + region->len); /* Grab header. */ hdr = mem - sizeof(*hdr); if (hdr->free) bad_header(region, hdr, "re-freed", location); make_free(region, (struct free_hdr *)hdr, location, false); } size_t mem_allocated_size(const void *ptr) { const struct alloc_hdr *hdr = ptr - sizeof(*hdr); return hdr->num_longs * sizeof(long) - sizeof(struct alloc_hdr); } bool mem_resize(struct mem_region *region, void *mem, size_t len, const char *location) { struct alloc_hdr *hdr, *next; struct free_hdr *f; /* This should be a constant. */ assert(is_rodata(location)); assert(lock_held_by_me(®ion->free_list_lock)); /* Get header. */ hdr = mem - sizeof(*hdr); if (hdr->free) bad_header(region, hdr, "resize", location); /* Round up size to multiple of longs. */ len = (sizeof(*hdr) + len + sizeof(long) - 1) / sizeof(long); /* Can't be too small for when we free it, either. */ if (len < ALLOC_MIN_LONGS) len = ALLOC_MIN_LONGS; /* Shrinking is simple. */ if (len <= hdr->num_longs) { hdr->location = location; discard_excess(region, hdr, len, location, false); return true; } /* Check if we can expand. */ next = next_hdr(region, hdr); if (!next || !next->free || hdr->num_longs + next->num_longs < len) return false; /* OK, it's free and big enough, absorb it. */ f = (struct free_hdr *)next; list_del_from(®ion->free_list, &f->list); hdr->num_longs += next->num_longs; hdr->location = location; /* Update next prev_free */ next = next_hdr(region, &f->hdr); if (next) { assert(next->prev_free); next->prev_free = false; } /* Clear tailer for debugging */ *tailer(f) = 0; /* Now we might have *too* much. */ discard_excess(region, hdr, len, location, true); return true; } bool mem_check(const struct mem_region *region) { size_t frees = 0; struct alloc_hdr *hdr, *prev_free = NULL; struct free_hdr *f; /* Check it's sanely aligned. */ if (region->start % sizeof(long)) { prerror("Region '%s' not sanely aligned (%llx)\n", region->name, (unsigned long long)region->start); return false; } if ((long)region->len % sizeof(long)) { prerror("Region '%s' not sane length (%llu)\n", region->name, (unsigned long long)region->len); return false; } /* Not ours to play with, or empty? Don't do anything. */ if (!(region->type == REGION_MEMORY || region->type == REGION_SKIBOOT_HEAP) || region->free_list.n.next == NULL) return true; /* Walk linearly. */ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) { if (hdr->num_longs < ALLOC_MIN_LONGS) { prerror("Region '%s' %s %p (%s) size %zu\n", region->name, hdr->free ? "free" : "alloc", hdr, hdr_location(hdr), hdr->num_longs * sizeof(long)); return false; } if ((unsigned long)hdr + hdr->num_longs * sizeof(long) > region->start + region->len) { prerror("Region '%s' %s %p (%s) oversize %zu\n", region->name, hdr->free ? "free" : "alloc", hdr, hdr_location(hdr), hdr->num_longs * sizeof(long)); return false; } if (hdr->free) { if (hdr->prev_free || prev_free) { prerror("Region '%s' free %p (%s) has prev_free" " %p (%s) %sset?\n", region->name, hdr, hdr_location(hdr), prev_free, prev_free ? hdr_location(prev_free) : "NULL", hdr->prev_free ? "" : "un"); return false; } prev_free = hdr; frees ^= (unsigned long)hdr - region->start; } else { if (hdr->prev_free != (bool)prev_free) { prerror("Region '%s' alloc %p (%s) has" " prev_free %p %sset?\n", region->name, hdr, hdr_location(hdr), prev_free, hdr->prev_free ? "" : "un"); return false; } prev_free = NULL; } } /* Now walk free list. */ list_for_each(®ion->free_list, f, list) frees ^= (unsigned long)f - region->start; if (frees) { prerror("Region '%s' free list and walk do not match!\n", region->name); return false; } return true; } bool mem_check_all(void) { struct mem_region *r; list_for_each(®ions, r, list) { if (!mem_check(r)) return false; } return true; } static struct mem_region *new_region(const char *name, uint64_t start, uint64_t len, struct dt_node *node, enum mem_region_type type) { struct mem_region *region; region = malloc(sizeof(*region)); if (!region) return NULL; region->name = name; region->start = start; region->len = len; region->node = node; region->type = type; region->free_list.n.next = NULL; init_lock(®ion->free_list_lock); return region; } /* We always split regions, so we only have to replace one. */ static struct mem_region *split_region(struct mem_region *head, uint64_t split_at, enum mem_region_type type) { struct mem_region *tail; uint64_t end = head->start + head->len; tail = new_region(head->name, split_at, end - split_at, head->node, type); /* Original region becomes head. */ if (tail) head->len -= tail->len; return tail; } static bool intersects(const struct mem_region *region, uint64_t addr) { return addr > region->start && addr < region->start + region->len; } static bool maybe_split(struct mem_region *r, uint64_t split_at) { struct mem_region *tail; if (!intersects(r, split_at)) return true; tail = split_region(r, split_at, r->type); if (!tail) return false; /* Tail add is important: we may need to split again! */ list_add_tail(®ions, &tail->list); return true; } static bool overlaps(const struct mem_region *r1, const struct mem_region *r2) { return (r1->start + r1->len > r2->start && r1->start < r2->start + r2->len); } static bool contains(const struct mem_region *r1, const struct mem_region *r2) { u64 r1_end = r1->start + r1->len; u64 r2_end = r2->start + r2->len; return (r1->start <= r2->start && r2_end <= r1_end); } static struct mem_region *get_overlap(const struct mem_region *region) { struct mem_region *i; list_for_each(®ions, i, list) { if (overlaps(region, i)) return i; } return NULL; } static bool add_region(struct mem_region *region) { struct mem_region *r; if (mem_regions_finalised) { prerror("MEM: add_region(%s@0x%"PRIx64") called after finalise!\n", region->name, region->start); return false; } /* First split any regions which intersect. */ list_for_each(®ions, r, list) { /* * The new region should be fully contained by an existing one. * If it's not then we have a problem where reservations * partially overlap which is probably broken. * * NB: There *might* be situations where this is legitimate, * but the region handling does not currently support this. */ if (overlaps(r, region) && !contains(r, region)) { prerror("MEM: Partial overlap detected between regions:\n"); prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"] (new)\n", region->name, region->start, region->start + region->len); prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"]\n", r->name, r->start, r->start + r->len); return false; } if (!maybe_split(r, region->start) || !maybe_split(r, region->start + region->len)) return false; } /* Now we have only whole overlaps, if any. */ while ((r = get_overlap(region)) != NULL) { assert(r->start == region->start); assert(r->len == region->len); list_del_from(®ions, &r->list); free(r); } /* Finally, add in our own region. */ list_add(®ions, ®ion->list); return true; } static void mem_reserve(enum mem_region_type type, const char *name, uint64_t start, uint64_t len) { struct mem_region *region; bool added = true; lock(&mem_region_lock); region = new_region(name, start, len, NULL, type); assert(region); if (!mem_region_init_done) list_add(&early_reserves, ®ion->list); else added = add_region(region); assert(added); unlock(&mem_region_lock); } void mem_reserve_fw(const char *name, uint64_t start, uint64_t len) { mem_reserve(REGION_FW_RESERVED, name, start, len); } void mem_reserve_hwbuf(const char *name, uint64_t start, uint64_t len) { mem_reserve(REGION_RESERVED, name, start, len); } static bool matches_chip_id(const __be32 ids[], size_t num, u32 chip_id) { size_t i; for (i = 0; i < num; i++) if (be32_to_cpu(ids[i]) == chip_id) return true; return false; } void *__local_alloc(unsigned int chip_id, size_t size, size_t align, const char *location) { struct mem_region *region; void *p = NULL; bool use_local = true; lock(&mem_region_lock); restart: list_for_each(®ions, region, list) { const struct dt_property *prop; const __be32 *ids; if (!(region->type == REGION_SKIBOOT_HEAP || region->type == REGION_MEMORY)) continue; /* Don't allocate from normal heap. */ if (region == &skiboot_heap) continue; /* First pass, only match node local regions */ if (use_local) { if (!region->node) continue; prop = dt_find_property(region->node, "ibm,chip-id"); ids = (const __be32 *)prop->prop; if (!matches_chip_id(ids, prop->len/sizeof(u32), chip_id)) continue; } /* Second pass, match anything */ lock(®ion->free_list_lock); p = mem_alloc(region, size, align, location); unlock(®ion->free_list_lock); if (p) break; } /* * If we can't allocate the memory block from the expected * node, we bail to any one that can accommodate our request. */ if (!p && use_local) { use_local = false; goto restart; } unlock(&mem_region_lock); return p; } struct mem_region *find_mem_region(const char *name) { struct mem_region *region; list_for_each(®ions, region, list) { if (streq(region->name, name)) return region; } return NULL; } bool mem_range_is_reserved(uint64_t start, uint64_t size) { uint64_t end = start + size; struct mem_region *region; struct list_head *search; /* We may have the range covered by a number of regions, which could * appear in any order. So, we look for a region that covers the * start address, and bump start up to the end of that region. * * We repeat until we've either bumped past the end of the range, * or we didn't find a matching region. * * This has a worst-case of O(n^2), but n is well bounded by the * small number of reservations. */ if (!mem_region_init_done) search = &early_reserves; else search = ®ions; for (;;) { bool found = false; list_for_each(search, region, list) { if (!region_is_reserved(region)) continue; /* does this region overlap the start address, and * have a non-zero size? */ if (region->start <= start && region->start + region->len > start && region->len) { start = region->start + region->len; found = true; } } /* 'end' is the first byte outside of the range */ if (start >= end) return true; if (!found) break; } return false; } static void mem_region_parse_reserved_properties(void) { const struct dt_property *names, *ranges; struct mem_region *region; prlog(PR_DEBUG, "MEM: parsing reserved memory from " "reserved-names/-ranges properties\n"); names = dt_find_property(dt_root, "reserved-names"); ranges = dt_find_property(dt_root, "reserved-ranges"); if (names && ranges) { const uint64_t *range; int n, len; range = (const void *)ranges->prop; for (n = 0; n < names->len; n += len, range += 2) { char *name; len = strlen(names->prop + n) + 1; name = strdup(names->prop + n); region = new_region(name, dt_get_number(range, 2), dt_get_number(range + 1, 2), NULL, REGION_FW_RESERVED); if (!add_region(region)) { prerror("Couldn't add mem_region %s\n", name); abort(); } } } else if (names || ranges) { prerror("Invalid properties: reserved-names=%p " "with reserved-ranges=%p\n", names, ranges); abort(); } else { return; } } static bool mem_region_parse_reserved_nodes(const char *path) { struct dt_node *parent, *node; parent = dt_find_by_path(dt_root, path); if (!parent) return false; prlog(PR_INFO, "MEM: parsing reserved memory from node %s\n", path); dt_for_each_child(parent, node) { const struct dt_property *reg; struct mem_region *region; int type; reg = dt_find_property(node, "reg"); if (!reg) { char *nodepath = dt_get_path(node); prerror("node %s has no reg property, ignoring\n", nodepath); free(nodepath); continue; } if (dt_has_node_property(node, "no-map", NULL)) type = REGION_RESERVED; else type = REGION_FW_RESERVED; region = new_region(strdup(node->name), dt_get_number(reg->prop, 2), dt_get_number(reg->prop + sizeof(u64), 2), node, type); if (!add_region(region)) { char *nodepath = dt_get_path(node); prerror("node %s failed to add_region()\n", nodepath); free(nodepath); } } return true; } /* Trawl through device tree, create memory regions from nodes. */ void mem_region_init(void) { struct mem_region *region, *next; struct dt_node *i; bool rc; /* Ensure we have no collision between skiboot core and our heap */ extern char _end[]; BUILD_ASSERT(HEAP_BASE >= (uint64_t)_end); /* * Add associativity properties outside of the lock * to avoid recursive locking caused by allocations * done by add_chip_dev_associativity() */ dt_for_each_node(dt_root, i) { if (!dt_has_node_property(i, "device_type", "memory")) continue; /* Add associativity properties */ add_chip_dev_associativity(i); } /* Add each memory node. */ dt_for_each_node(dt_root, i) { uint64_t start, len; char *rname; #define NODE_REGION_PREFIX "ibm,firmware-allocs-" if (!dt_has_node_property(i, "device_type", "memory")) continue; rname = zalloc(strlen(i->name) + strlen(NODE_REGION_PREFIX) + 1); assert(rname); strcat(rname, NODE_REGION_PREFIX); strcat(rname, i->name); start = dt_get_address(i, 0, &len); lock(&mem_region_lock); region = new_region(rname, start, len, i, REGION_MEMORY); if (!region) { prerror("MEM: Could not add mem region %s!\n", i->name); abort(); } list_add(®ions, ®ion->list); if ((start + len) > top_of_ram) top_of_ram = start + len; unlock(&mem_region_lock); } /* * This is called after we know the maximum PIR of all CPUs, * so we can dynamically set the stack length. */ skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE; lock(&mem_region_lock); /* Now carve out our own reserved areas. */ if (!add_region(&skiboot_os_reserve) || !add_region(&skiboot_code_and_text) || !add_region(&skiboot_heap) || !add_region(&skiboot_after_heap) || !add_region(&skiboot_cpu_stacks)) { prerror("Out of memory adding skiboot reserved areas\n"); abort(); } if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { if (!add_region(&skiboot_mambo_kernel) || !add_region(&skiboot_mambo_initramfs)) { prerror("Out of memory adding mambo payload\n"); abort(); } } /* Add reserved reanges from HDAT */ list_for_each_safe(&early_reserves, region, next, list) { bool added; list_del(®ion->list); added = add_region(region); assert(added); } /* Add reserved ranges from the DT */ rc = mem_region_parse_reserved_nodes("/reserved-memory"); if (!rc) rc = mem_region_parse_reserved_nodes( "/ibm,hostboot/reserved-memory"); if (!rc) mem_region_parse_reserved_properties(); mem_region_init_done = true; unlock(&mem_region_lock); } static uint64_t allocated_length(const struct mem_region *r) { struct free_hdr *f, *last = NULL; /* No allocations at all? */ if (r->free_list.n.next == NULL) return 0; /* Find last free block. */ list_for_each(&r->free_list, f, list) if (f > last) last = f; /* No free blocks? */ if (!last) return r->len; /* Last free block isn't at end? */ if (next_hdr(r, &last->hdr)) return r->len; return (unsigned long)last - r->start; } /* Separate out allocated sections into their own region. */ void mem_region_release_unused(void) { struct mem_region *r; lock(&mem_region_lock); assert(!mem_regions_finalised); prlog(PR_INFO, "Releasing unused memory:\n"); list_for_each(®ions, r, list) { uint64_t used_len; /* If it's not allocatable, ignore it. */ if (!(r->type == REGION_SKIBOOT_HEAP || r->type == REGION_MEMORY)) continue; used_len = allocated_length(r); prlog(PR_INFO, " %s: %llu/%llu used\n", r->name, (long long)used_len, (long long)r->len); /* We keep the skiboot heap. */ if (r == &skiboot_heap) continue; /* Nothing used? Whole thing is for Linux. */ if (used_len == 0) r->type = REGION_OS; /* Partially used? Split region. */ else if (used_len != r->len) { struct mem_region *for_linux; struct free_hdr *last = region_start(r) + used_len; /* Remove the final free block. */ list_del_from(&r->free_list, &last->list); for_linux = split_region(r, r->start + used_len, REGION_OS); if (!for_linux) { prerror("OOM splitting mem node %s for linux\n", r->name); abort(); } list_add(®ions, &for_linux->list); } } unlock(&mem_region_lock); } static void mem_clear_range(uint64_t s, uint64_t e) { uint64_t res_start, res_end; /* Skip exception vectors */ if (s < EXCEPTION_VECTORS_END) s = EXCEPTION_VECTORS_END; /* Skip kernel preload area */ res_start = (uint64_t)KERNEL_LOAD_BASE; res_end = res_start + KERNEL_LOAD_SIZE; if (s >= res_start && s < res_end) s = res_end; if (e > res_start && e <= res_end) e = res_start; if (e <= s) return; if (s < res_start && e > res_end) { mem_clear_range(s, res_start); mem_clear_range(res_end, e); return; } /* Skip initramfs preload area */ res_start = (uint64_t)INITRAMFS_LOAD_BASE; res_end = res_start + INITRAMFS_LOAD_SIZE; if (s >= res_start && s < res_end) s = res_end; if (e > res_start && e <= res_end) e = res_start; if (e <= s) return; if (s < res_start && e > res_end) { mem_clear_range(s, res_start); mem_clear_range(res_end, e); return; } prlog(PR_DEBUG, "Clearing region %llx-%llx\n", (long long)s, (long long)e); memset((void *)s, 0, e - s); } struct mem_region_clear_job_args { char *job_name; uint64_t s,e; }; static void mem_region_clear_job(void *data) { struct mem_region_clear_job_args *arg = (struct mem_region_clear_job_args*)data; mem_clear_range(arg->s, arg->e); } #define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30)) static struct cpu_job **mem_clear_jobs; static struct mem_region_clear_job_args *mem_clear_job_args; static int mem_clear_njobs = 0; void start_mem_region_clear_unused(void) { struct mem_region *r; uint64_t s,l; uint64_t total = 0; uint32_t chip_id; char *path; int i; struct cpu_job **jobs; struct mem_region_clear_job_args *job_args; lock(&mem_region_lock); assert(mem_regions_finalised); mem_clear_njobs = 0; list_for_each(®ions, r, list) { if (!(r->type == REGION_OS)) continue; mem_clear_njobs++; /* One job per 16GB */ mem_clear_njobs += r->len / MEM_REGION_CLEAR_JOB_SIZE; } jobs = malloc(mem_clear_njobs * sizeof(struct cpu_job*)); job_args = malloc(mem_clear_njobs * sizeof(struct mem_region_clear_job_args)); mem_clear_jobs = jobs; mem_clear_job_args = job_args; prlog(PR_NOTICE, "Clearing unused memory:\n"); i = 0; list_for_each(®ions, r, list) { /* If it's not unused, ignore it. */ if (!(r->type == REGION_OS)) continue; assert(r != &skiboot_heap); s = r->start; l = r->len; while(l > MEM_REGION_CLEAR_JOB_SIZE) { job_args[i].s = s+l - MEM_REGION_CLEAR_JOB_SIZE; job_args[i].e = s+l; l-=MEM_REGION_CLEAR_JOB_SIZE; job_args[i].job_name = malloc(sizeof(char)*100); total+=MEM_REGION_CLEAR_JOB_SIZE; chip_id = __dt_get_chip_id(r->node); if (chip_id == -1) chip_id = 0; path = dt_get_path(r->node); snprintf(job_args[i].job_name, 100, "clear %s, %s 0x%"PRIx64" len: %"PRIx64" on %d", r->name, path, job_args[i].s, (job_args[i].e - job_args[i].s), chip_id); free(path); jobs[i] = cpu_queue_job_on_node(chip_id, job_args[i].job_name, mem_region_clear_job, &job_args[i]); if (!jobs[i]) jobs[i] = cpu_queue_job(NULL, job_args[i].job_name, mem_region_clear_job, &job_args[i]); assert(jobs[i]); i++; } job_args[i].s = s; job_args[i].e = s+l; job_args[i].job_name = malloc(sizeof(char)*100); total+=l; chip_id = __dt_get_chip_id(r->node); if (chip_id == -1) chip_id = 0; path = dt_get_path(r->node); snprintf(job_args[i].job_name,100, "clear %s, %s 0x%"PRIx64" len: 0x%"PRIx64" on %d", r->name, path, job_args[i].s, (job_args[i].e - job_args[i].s), chip_id); free(path); jobs[i] = cpu_queue_job_on_node(chip_id, job_args[i].job_name, mem_region_clear_job, &job_args[i]); i++; } unlock(&mem_region_lock); cpu_process_local_jobs(); } void wait_mem_region_clear_unused(void) { uint64_t l; uint64_t total = 0; int i; for(i=0; i < mem_clear_njobs; i++) { total += (mem_clear_job_args[i].e - mem_clear_job_args[i].s); } l = 0; for(i=0; i < mem_clear_njobs; i++) { cpu_wait_job(mem_clear_jobs[i], true); l += (mem_clear_job_args[i].e - mem_clear_job_args[i].s); printf("Clearing memory... %"PRIu64"/%"PRIu64"GB done\n", l>>30, total>>30); free(mem_clear_job_args[i].job_name); } free(mem_clear_jobs); free(mem_clear_job_args); } static void mem_region_add_dt_reserved_node(struct dt_node *parent, struct mem_region *region) { char *name, *p; /* If a reserved region was established before skiboot, it may be * referenced by a device-tree node with extra data. In that case, * copy the node to /reserved-memory/, unless it's already there. * * We update region->node to the new copy here, as the prd code may * update regions' device-tree nodes, and we want those updates to * apply to the nodes in /reserved-memory/. */ if (region->type == REGION_FW_RESERVED && region->node) { if (region->node->parent != parent) region->node = dt_copy(region->node, parent); return; } name = strdup(region->name); assert(name); /* remove any cell addresses in the region name; we have our own cell * addresses here */ p = strchr(name, '@'); if (p) *p = '\0'; region->node = dt_new_addr(parent, name, region->start); assert(region->node); dt_add_property_u64s(region->node, "reg", region->start, region->len); /* * This memory is used by hardware and may need special handling. Ask * the host kernel not to map it by default. */ if (region->type == REGION_RESERVED) dt_add_property(region->node, "no-map", NULL, 0); free(name); } void mem_region_add_dt_reserved(void) { int names_len, ranges_len, len; const struct dt_property *prop; struct mem_region *region; void *names, *ranges; struct dt_node *node; uint64_t *range; char *name; names_len = 0; ranges_len = 0; /* Finalise the region list, so we know that the regions list won't be * altered after this point. The regions' free lists may change after * we drop the lock, but we don't access those. */ lock(&mem_region_lock); mem_regions_finalised = true; /* establish top-level reservation node */ node = dt_find_by_path(dt_root, "reserved-memory"); if (!node) { node = dt_new(dt_root, "reserved-memory"); dt_add_property_cells(node, "#address-cells", 2); dt_add_property_cells(node, "#size-cells", 2); dt_add_property(node, "ranges", NULL, 0); } prlog(PR_INFO, "Reserved regions:\n"); /* First pass, create /reserved-memory/ nodes for each reservation, * and calculate the length for the /reserved-names and * /reserved-ranges properties */ list_for_each(®ions, region, list) { if (!region_is_reservable(region)) continue; prlog(PR_INFO, " 0x%012llx..%012llx : %s\n", (long long)region->start, (long long)(region->start + region->len - 1), region->name); mem_region_add_dt_reserved_node(node, region); /* calculate the size of the properties populated later */ names_len += strlen(region->node->name) + 1; ranges_len += 2 * sizeof(uint64_t); } name = names = malloc(names_len); range = ranges = malloc(ranges_len); /* Second pass: populate the old-style reserved-names and * reserved-regions arrays based on the node data */ list_for_each(®ions, region, list) { if (!region_is_reservable(region)) continue; len = strlen(region->node->name) + 1; memcpy(name, region->node->name, len); name += len; range[0] = cpu_to_fdt64(region->start); range[1] = cpu_to_fdt64(region->len); range += 2; } unlock(&mem_region_lock); prop = dt_find_property(dt_root, "reserved-names"); if (prop) dt_del_property(dt_root, (struct dt_property *)prop); prop = dt_find_property(dt_root, "reserved-ranges"); if (prop) dt_del_property(dt_root, (struct dt_property *)prop); dt_add_property(dt_root, "reserved-names", names, names_len); dt_add_property(dt_root, "reserved-ranges", ranges, ranges_len); free(names); free(ranges); } struct mem_region *mem_region_next(struct mem_region *region) { struct list_node *node; assert(lock_held_by_me(&mem_region_lock)); node = region ? ®ion->list : ®ions.n; if (node->next == ®ions.n) return NULL; return list_entry(node->next, struct mem_region, list); }