diff options
Diffstat (limited to 'mm/kasan/common.c')
-rw-r--r-- | mm/kasan/common.c | 290 |
1 files changed, 280 insertions, 10 deletions
diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 2277b82902d8..6aa51723b92b 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -36,6 +36,9 @@ #include <linux/bug.h> #include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> + #include "kasan.h" #include "../slab.h" @@ -107,6 +110,7 @@ void *memset(void *addr, int c, size_t len) return __memset(addr, c, len); } +#ifdef __HAVE_ARCH_MEMMOVE #undef memmove void *memmove(void *dest, const void *src, size_t len) { @@ -115,6 +119,7 @@ void *memmove(void *dest, const void *src, size_t len) return __memmove(dest, src, len); } +#endif #undef memcpy void *memcpy(void *dest, const void *src, size_t len) @@ -304,7 +309,6 @@ size_t kasan_metadata_size(struct kmem_cache *cache) struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, const void *object) { - BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32); return (void *)object + cache->kasan_info.alloc_meta_offset; } @@ -315,14 +319,31 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache, return (void *)object + cache->kasan_info.free_meta_offset; } + +static void kasan_set_free_info(struct kmem_cache *cache, + void *object, u8 tag) +{ + struct kasan_alloc_meta *alloc_meta; + u8 idx = 0; + + alloc_meta = get_alloc_info(cache, object); + +#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY + idx = alloc_meta->free_track_idx; + alloc_meta->free_pointer_tag[idx] = tag; + alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS; +#endif + + set_track(&alloc_meta->free_track[idx], GFP_NOWAIT); +} + void kasan_poison_slab(struct page *page) { unsigned long i; - for (i = 0; i < (1 << compound_order(page)); i++) + for (i = 0; i < compound_nr(page); i++) page_kasan_tag_reset(page + i); - kasan_poison_shadow(page_address(page), - PAGE_SIZE << compound_order(page), + kasan_poison_shadow(page_address(page), page_size(page), KASAN_KMALLOC_REDZONE); } @@ -407,8 +428,14 @@ static inline bool shadow_invalid(u8 tag, s8 shadow_byte) if (IS_ENABLED(CONFIG_KASAN_GENERIC)) return shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE; - else - return tag != (u8)shadow_byte; + + /* else CONFIG_KASAN_SW_TAGS: */ + if ((u8)shadow_byte == KASAN_TAG_INVALID) + return true; + if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte)) + return true; + + return false; } static bool __kasan_slab_free(struct kmem_cache *cache, void *object, @@ -446,7 +473,8 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object, unlikely(!(cache->flags & SLAB_KASAN))) return false; - set_track(&get_alloc_info(cache, object)->free_track, GFP_NOWAIT); + kasan_set_free_info(cache, object, tag); + quarantine_put(get_free_info(cache, object), cache); return IS_ENABLED(CONFIG_KASAN_GENERIC); @@ -518,7 +546,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, page = virt_to_page(ptr); redzone_start = round_up((unsigned long)(ptr + size), KASAN_SHADOW_SCALE_SIZE); - redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page)); + redzone_end = (unsigned long)ptr + page_size(page); kasan_unpoison_shadow(ptr, size); kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, @@ -554,8 +582,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip) kasan_report_invalid_free(ptr, ip); return; } - kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), - KASAN_FREE_PAGE); + kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE); } else { __kasan_slab_free(page->slab_cache, ptr, ip, false); } @@ -568,6 +595,7 @@ void kasan_kfree_large(void *ptr, unsigned long ip) /* The object will be poisoned by page_alloc. */ } +#ifndef CONFIG_KASAN_VMALLOC int kasan_module_alloc(void *addr, size_t size) { void *ret; @@ -603,6 +631,7 @@ void kasan_free_shadow(const struct vm_struct *vm) if (vm->flags & VM_KASAN) vfree(kasan_mem_to_shadow(vm->addr)); } +#endif extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); @@ -722,3 +751,244 @@ static int __init kasan_memhotplug_init(void) core_initcall(kasan_memhotplug_init); #endif + +#ifdef CONFIG_KASAN_VMALLOC +static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, + void *unused) +{ + unsigned long page; + pte_t pte; + + if (likely(!pte_none(*ptep))) + return 0; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); + pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); + + spin_lock(&init_mm.page_table_lock); + if (likely(pte_none(*ptep))) { + set_pte_at(&init_mm, addr, ptep, pte); + page = 0; + } + spin_unlock(&init_mm.page_table_lock); + if (page) + free_page(page); + return 0; +} + +int kasan_populate_vmalloc(unsigned long addr, unsigned long size) +{ + unsigned long shadow_start, shadow_end; + int ret; + + if (!is_vmalloc_or_module_addr((void *)addr)) + return 0; + + shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); + shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); + shadow_end = ALIGN(shadow_end, PAGE_SIZE); + + ret = apply_to_page_range(&init_mm, shadow_start, + shadow_end - shadow_start, + kasan_populate_vmalloc_pte, NULL); + if (ret) + return ret; + + flush_cache_vmap(shadow_start, shadow_end); + + /* + * We need to be careful about inter-cpu effects here. Consider: + * + * CPU#0 CPU#1 + * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; + * p[99] = 1; + * + * With compiler instrumentation, that ends up looking like this: + * + * CPU#0 CPU#1 + * // vmalloc() allocates memory + * // let a = area->addr + * // we reach kasan_populate_vmalloc + * // and call kasan_unpoison_shadow: + * STORE shadow(a), unpoison_val + * ... + * STORE shadow(a+99), unpoison_val x = LOAD p + * // rest of vmalloc process <data dependency> + * STORE p, a LOAD shadow(x+99) + * + * If there is no barrier between the end of unpoisioning the shadow + * and the store of the result to p, the stores could be committed + * in a different order by CPU#0, and CPU#1 could erroneously observe + * poison in the shadow. + * + * We need some sort of barrier between the stores. + * + * In the vmalloc() case, this is provided by a smp_wmb() in + * clear_vm_uninitialized_flag(). In the per-cpu allocator and in + * get_vm_area() and friends, the caller gets shadow allocated but + * doesn't have any pages mapped into the virtual address space that + * has been reserved. Mapping those pages in will involve taking and + * releasing a page-table lock, which will provide the barrier. + */ + + return 0; +} + +/* + * Poison the shadow for a vmalloc region. Called as part of the + * freeing process at the time the region is freed. + */ +void kasan_poison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID); +} + +void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + kasan_unpoison_shadow(start, size); +} + +static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, + void *unused) +{ + unsigned long page; + + page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); + + spin_lock(&init_mm.page_table_lock); + + if (likely(!pte_none(*ptep))) { + pte_clear(&init_mm, addr, ptep); + free_page(page); + } + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +/* + * Release the backing for the vmalloc region [start, end), which + * lies within the free region [free_region_start, free_region_end). + * + * This can be run lazily, long after the region was freed. It runs + * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap + * infrastructure. + * + * How does this work? + * ------------------- + * + * We have a region that is page aligned, labelled as A. + * That might not map onto the shadow in a way that is page-aligned: + * + * start end + * v v + * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc + * -------- -------- -------- -------- -------- + * | | | | | + * | | | /-------/ | + * \-------\|/------/ |/---------------/ + * ||| || + * |??AAAAAA|AAAAAAAA|AA??????| < shadow + * (1) (2) (3) + * + * First we align the start upwards and the end downwards, so that the + * shadow of the region aligns with shadow page boundaries. In the + * example, this gives us the shadow page (2). This is the shadow entirely + * covered by this allocation. + * + * Then we have the tricky bits. We want to know if we can free the + * partially covered shadow pages - (1) and (3) in the example. For this, + * we are given the start and end of the free region that contains this + * allocation. Extending our previous example, we could have: + * + * free_region_start free_region_end + * | start end | + * v v v v + * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc + * -------- -------- -------- -------- -------- + * | | | | | + * | | | /-------/ | + * \-------\|/------/ |/---------------/ + * ||| || + * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow + * (1) (2) (3) + * + * Once again, we align the start of the free region up, and the end of + * the free region down so that the shadow is page aligned. So we can free + * page (1) - we know no allocation currently uses anything in that page, + * because all of it is in the vmalloc free region. But we cannot free + * page (3), because we can't be sure that the rest of it is unused. + * + * We only consider pages that contain part of the original region for + * freeing: we don't try to free other pages from the free region or we'd + * end up trying to free huge chunks of virtual address space. + * + * Concurrency + * ----------- + * + * How do we know that we're not freeing a page that is simultaneously + * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? + * + * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running + * at the same time. While we run under free_vmap_area_lock, the population + * code does not. + * + * free_vmap_area_lock instead operates to ensure that the larger range + * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and + * the per-cpu region-finding algorithm both run under free_vmap_area_lock, + * no space identified as free will become used while we are running. This + * means that so long as we are careful with alignment and only free shadow + * pages entirely covered by the free region, we will not run in to any + * trouble - any simultaneous allocations will be for disjoint regions. + */ +void kasan_release_vmalloc(unsigned long start, unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) +{ + void *shadow_start, *shadow_end; + unsigned long region_start, region_end; + unsigned long size; + + region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + + free_region_start = ALIGN(free_region_start, + PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + + if (start != region_start && + free_region_start < region_start) + region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; + + free_region_end = ALIGN_DOWN(free_region_end, + PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + + if (end != region_end && + free_region_end > region_end) + region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; + + shadow_start = kasan_mem_to_shadow((void *)region_start); + shadow_end = kasan_mem_to_shadow((void *)region_end); + + if (shadow_end > shadow_start) { + size = shadow_end - shadow_start; + apply_to_existing_page_range(&init_mm, + (unsigned long)shadow_start, + size, kasan_depopulate_vmalloc_pte, + NULL); + flush_tlb_kernel_range((unsigned long)shadow_start, + (unsigned long)shadow_end); + } +} +#endif |