From 49c3df6aaa6a51071fc135273d1a2515d019099f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86: Move swsusp __pa() dependent code to arch portion o __pa() should be used only on kernel linearly mapped virtual addresses and not on kernel text and data addresses. o Hibernation code needs to determine the physical address associated with kernel symbol to mark a section boundary which contains pages which don't have to be saved and restored during hibernate/resume operation. o Move this piece of code in arch dependent section. So that architectures which don't have kernel text/data mapped into kernel linearly mapped region can come up with their own ways of determining physical addresses associated with a kernel text. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- kernel/power/power.h | 5 ++--- kernel/power/snapshot.c | 11 ----------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816bf4..1c6eef8df4ad 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -23,6 +23,8 @@ static inline int pm_suspend_disk(void) } #endif +extern int pfn_is_nosave(unsigned long); + extern struct mutex pm_mutex; #define power_attr(_name) \ @@ -37,9 +39,6 @@ static struct subsys_attribute _name##_attr = { \ extern struct subsystem power_subsys; -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; extern int in_suspend; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index fc53ad068128..704c25a3ffec 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -650,17 +650,6 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } static inline unsigned int count_highmem_pages(void) { return 0; } #endif /* CONFIG_HIGHMEM */ -/** - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -static inline int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - /** * saveable - Determine whether a non-highmem page should be included in * the suspend image. -- cgit v1.2.1 From 1b29c1643c0d82512477ccd97dc290198fe23e22 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 May 2007 19:27:07 +0200 Subject: [PATCH] x86-64: do not use virt_to_page on kernel data address o virt_to_page() call should be used on kernel linear addresses and not on kernel text and data addresses. Swsusp code uses it on kernel data (statically allocated swsusp_header). o Allocate swsusp_header dynamically so that virt_to_page() can be used safely. o I am changing this because in next few patches, __pa() on x86_64 will no longer support kernel text and data addresses and hibernation breaks. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- kernel/power/swap.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 3581f8f86acd..b18c155cbb60 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,12 +33,14 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" -static struct swsusp_header { +struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; sector_t image; char orig_sig[10]; char sig[10]; -} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; +} __attribute__((packed)); + +static struct swsusp_header *swsusp_header; /* * General things @@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start) { int error; - bio_read_page(swsusp_resume_block, &swsusp_header, NULL); - if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || - !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { - memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); - memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - swsusp_header.image = start; + bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { + memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); + memcpy(swsusp_header->sig,SWSUSP_SIG, 10); + swsusp_header->image = start; error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { printk(KERN_ERR "swsusp: Swap header not found!\n"); error = -ENODEV; @@ -564,7 +566,7 @@ int swsusp_read(void) if (error < PAGE_SIZE) return error < 0 ? error : -EFAULT; header = (struct swsusp_info *)data_of(snapshot); - error = get_swap_reader(&handle, swsusp_header.image); + error = get_swap_reader(&handle, swsusp_header->image); if (!error) error = swap_read_page(&handle, header, NULL); if (!error) @@ -591,17 +593,17 @@ int swsusp_check(void) resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); - memset(&swsusp_header, 0, sizeof(swsusp_header)); + memset(swsusp_header, 0, sizeof(PAGE_SIZE)); error = bio_read_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); if (error) return error; - if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { - memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { return -EINVAL; } @@ -632,3 +634,13 @@ void swsusp_close(void) blkdev_put(resume_bdev); } + +static int swsusp_header_init(void) +{ + swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); + if (!swsusp_header) + panic("Could not allocate memory for swsusp_header\n"); + return 0; +} + +core_initcall(swsusp_header_init); -- cgit v1.2.1 From 823bccfc4002296ba88c3ad0f049e1abd8108d30 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 13 Apr 2007 13:15:19 -0700 Subject: remove "struct subsystem" as it is no longer needed We need to work on cleaning up the relationship between kobjects, ksets and ktypes. The removal of 'struct subsystem' is the first step of this, especially as it is not really needed at all. Thanks to Kay for fixing the bugs in this patch. Signed-off-by: Greg Kroah-Hartman --- kernel/power/disk.c | 14 +++++++------- kernel/power/main.c | 10 +++++----- kernel/power/power.h | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 02e4fb69111a..8df51c23bba4 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -322,13 +322,13 @@ static const char * const pm_disk_modes[] = { * supports it (as determined from pm_ops->pm_disk_mode). */ -static ssize_t disk_show(struct subsystem * subsys, char * buf) +static ssize_t disk_show(struct kset *kset, char *buf) { return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); } -static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +static ssize_t disk_store(struct kset *kset, const char *buf, size_t n) { int error = 0; int i; @@ -373,13 +373,13 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) power_attr(disk); -static ssize_t resume_show(struct subsystem * subsys, char *buf) +static ssize_t resume_show(struct kset *kset, char *buf) { return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); } -static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) +static ssize_t resume_store(struct kset *kset, const char *buf, size_t n) { unsigned int maj, min; dev_t res; @@ -405,12 +405,12 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) power_attr(resume); -static ssize_t image_size_show(struct subsystem * subsys, char *buf) +static ssize_t image_size_show(struct kset *kset, char *buf) { return sprintf(buf, "%lu\n", image_size); } -static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) +static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n) { unsigned long size; @@ -439,7 +439,7 @@ static struct attribute_group attr_group = { static int __init pm_disk_init(void) { - return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + return sysfs_create_group(&power_subsys.kobj, &attr_group); } core_initcall(pm_disk_init); diff --git a/kernel/power/main.c b/kernel/power/main.c index 72419a3b1beb..b21c2a56f960 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -285,7 +285,7 @@ decl_subsys(power,NULL,NULL); * proper enumerated value, and initiates a suspend transition. */ -static ssize_t state_show(struct subsystem * subsys, char * buf) +static ssize_t state_show(struct kset *kset, char *buf) { int i; char * s = buf; @@ -298,7 +298,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf) return (s - buf); } -static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) +static ssize_t state_store(struct kset *kset, const char *buf, size_t n) { suspend_state_t state = PM_SUSPEND_STANDBY; const char * const *s; @@ -325,13 +325,13 @@ power_attr(state); #ifdef CONFIG_PM_TRACE int pm_trace_enabled; -static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) +static ssize_t pm_trace_show(struct kset *kset, char *buf) { return sprintf(buf, "%d\n", pm_trace_enabled); } static ssize_t -pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) +pm_trace_store(struct kset *kset, const char *buf, size_t n) { int val; @@ -365,7 +365,7 @@ static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) - error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + error = sysfs_create_group(&power_subsys.kobj,&attr_group); return error; } diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816bf4..5f842c3efc4b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -35,7 +35,7 @@ static struct subsys_attribute _name##_attr = { \ .store = _name##_store, \ } -extern struct subsystem power_subsys; +extern struct kset power_subsys; /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; -- cgit v1.2.1 From 433ecb4ab312f873870b67ee374502e84f6dcf92 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 May 2007 14:50:40 -0700 Subject: fix refrigerator() vs thaw_process() race refrigerator() can miss a wakeup, "wait event" loop needs a proper memory ordering. Signed-off-by: Oleg Nesterov Acked-by: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/process.c b/kernel/power/process.c index 6d566bf7085c..0eb5c420e8ed 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -47,8 +47,10 @@ void refrigerator(void) recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - while (frozen(current)) { - current->state = TASK_UNINTERRUPTIBLE; + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!frozen(current)) + break; schedule(); } pr_debug("%s left refrigerator\n", current->comm); -- cgit v1.2.1 From 7be9823491ecbaf9700d7d3502cb4b4dd0ed868a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:42 -0700 Subject: swsusp: use inline functions for changing page flags Replace direct invocations of SetPageNosave(), SetPageNosaveFree() etc. with calls to inline functions that can be changed in subsequent patches without modifying the code calling them. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/snapshot.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 704c25a3ffec..48fc7a35571b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -67,15 +67,15 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) res = (void *)get_zeroed_page(gfp_mask); if (safe_needed) - while (res && PageNosaveFree(virt_to_page(res))) { + while (res && swsusp_page_is_free(virt_to_page(res))) { /* The page is unsafe, mark it for swsusp_free() */ - SetPageNosave(virt_to_page(res)); + swsusp_set_page_forbidden(virt_to_page(res)); allocated_unsafe_pages++; res = (void *)get_zeroed_page(gfp_mask); } if (res) { - SetPageNosave(virt_to_page(res)); - SetPageNosaveFree(virt_to_page(res)); + swsusp_set_page_forbidden(virt_to_page(res)); + swsusp_set_page_free(virt_to_page(res)); } return res; } @@ -91,8 +91,8 @@ static struct page *alloc_image_page(gfp_t gfp_mask) page = alloc_page(gfp_mask); if (page) { - SetPageNosave(page); - SetPageNosaveFree(page); + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); } return page; } @@ -110,9 +110,9 @@ static inline void free_image_page(void *addr, int clear_nosave_free) page = virt_to_page(addr); - ClearPageNosave(page); + swsusp_unset_page_forbidden(page); if (clear_nosave_free) - ClearPageNosaveFree(page); + swsusp_unset_page_free(page); __free_page(page); } @@ -615,7 +615,8 @@ static struct page *saveable_highmem_page(unsigned long pfn) BUG_ON(!PageHighMem(page)); - if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || + PageReserved(page)) return NULL; return page; @@ -670,7 +671,7 @@ static struct page *saveable_page(unsigned long pfn) BUG_ON(PageHighMem(page)); - if (PageNosave(page) || PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) return NULL; if (PageReserved(page) && pfn_is_nosave(pfn)) @@ -810,9 +811,10 @@ void swsusp_free(void) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - if (PageNosave(page) && PageNosaveFree(page)) { - ClearPageNosave(page); - ClearPageNosaveFree(page); + if (swsusp_page_is_forbidden(page) && + swsusp_page_is_free(page)) { + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); __free_page(page); } } @@ -1135,7 +1137,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) - ClearPageNosaveFree(pfn_to_page(pfn)); + swsusp_unset_page_free(pfn_to_page(pfn)); } /* Mark pages that correspond to the "original" pfns as "unsafe" */ @@ -1144,7 +1146,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) pfn = memory_bm_next_pfn(bm); if (likely(pfn != BM_END_OF_MAP)) { if (likely(pfn_valid(pfn))) - SetPageNosaveFree(pfn_to_page(pfn)); + swsusp_set_page_free(pfn_to_page(pfn)); else return -EFAULT; } @@ -1310,14 +1312,14 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) struct page *page; page = alloc_page(__GFP_HIGHMEM); - if (!PageNosaveFree(page)) { + if (!swsusp_page_is_free(page)) { /* The page is "safe", set its bit the bitmap */ memory_bm_set_bit(bm, page_to_pfn(page)); safe_highmem_pages++; } /* Mark the page as allocated */ - SetPageNosave(page); - SetPageNosaveFree(page); + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); } memory_bm_position_reset(bm); safe_highmem_bm = bm; @@ -1349,7 +1351,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) struct highmem_pbe *pbe; void *kaddr; - if (PageNosave(page) && PageNosaveFree(page)) { + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ @@ -1511,14 +1513,14 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) error = -ENOMEM; goto Free; } - if (!PageNosaveFree(virt_to_page(lp))) { + if (!swsusp_page_is_free(virt_to_page(lp))) { /* The page is "safe", add it to the list */ lp->next = safe_pages_list; safe_pages_list = lp; } /* Mark the page as allocated */ - SetPageNosave(virt_to_page(lp)); - SetPageNosaveFree(virt_to_page(lp)); + swsusp_set_page_forbidden(virt_to_page(lp)); + swsusp_set_page_free(virt_to_page(lp)); nr_pages--; } /* Free the reserved safe pages so that chain_alloc() can use them */ @@ -1547,7 +1549,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) if (PageHighMem(page)) return get_highmem_page_buffer(page, ca); - if (PageNosave(page) && PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ -- cgit v1.2.1 From 74dfd666de861c97d47bdbd892f6d21b801d0247 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:43 -0700 Subject: swsusp: do not use page flags Make swsusp use memory bitmaps instead of page flags for marking 'nosave' and free pages. This allows us to 'recycle' two page flags that can be used for other purposes. Also, the memory needed to store the bitmaps is allocated when necessary (ie. before the suspend) and freed after the resume which is more reasonable. The patch is designed to minimize the amount of changes and there are some nice simplifications and optimizations possible on top of it. I am going to implement them separately in the future. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 23 +++-- kernel/power/main.c | 2 + kernel/power/power.h | 2 + kernel/power/snapshot.c | 250 ++++++++++++++++++++++++++++++++++++++++++++---- kernel/power/user.c | 4 + 5 files changed, 259 insertions(+), 22 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 8df51c23bba4..403bc3722fee 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -139,14 +139,19 @@ int pm_suspend_disk(void) mdelay(5000); goto Thaw; } + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; + /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (error) - goto Thaw; + goto Finish; error = platform_prepare(); if (error) - goto Thaw; + goto Finish; suspend_console(); error = device_suspend(PMSG_FREEZE); @@ -181,7 +186,7 @@ int pm_suspend_disk(void) power_down(); else { swsusp_free(); - goto Thaw; + goto Finish; } } else { pr_debug("PM: Image restored successfully.\n"); @@ -194,6 +199,8 @@ int pm_suspend_disk(void) platform_finish(); device_resume(); resume_console(); + Finish: + free_basic_memory_bitmaps(); Thaw: unprepare_processes(); return error; @@ -239,13 +246,15 @@ static int software_resume(void) } pr_debug("PM: Checking swsusp image.\n"); - error = swsusp_check(); if (error) - goto Done; + goto Unlock; - pr_debug("PM: Preparing processes for restore.\n"); + error = create_basic_memory_bitmaps(); + if (error) + goto Unlock; + pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); if (error) { swsusp_close(); @@ -280,7 +289,9 @@ static int software_resume(void) printk(KERN_ERR "PM: Restore failed, recovering.\n"); unprepare_processes(); Done: + free_basic_memory_bitmaps(); /* For success case, the suspend path will release the lock */ + Unlock: mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); return 0; diff --git a/kernel/power/main.c b/kernel/power/main.c index b21c2a56f960..5a779270cdbc 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -244,6 +244,7 @@ static int enter_state(suspend_state_t state) return error; } +#ifdef CONFIG_SOFTWARE_SUSPEND /* * This is main interface to the outside world. It needs to be * called from process context. @@ -252,6 +253,7 @@ int software_suspend(void) { return enter_state(PM_SUSPEND_DISK); } +#endif /** diff --git a/kernel/power/power.h b/kernel/power/power.h index 33bd94ceba32..1f8052bda0f7 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -48,6 +48,8 @@ extern sector_t swsusp_resume_block; extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern int create_basic_memory_bitmaps(void); +extern void free_basic_memory_bitmaps(void); extern unsigned int count_data_pages(void); /** diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 48fc7a35571b..f66e4411795b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,10 @@ #include "power.h" +static int swsusp_page_is_free(struct page *); +static void swsusp_set_page_forbidden(struct page *); +static void swsusp_unset_page_forbidden(struct page *); + /* List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written @@ -224,11 +229,6 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave) * of type unsigned long each). It also contains the pfns that * correspond to the start and end of the represented memory area and * the number of bit chunks in the block. - * - * NOTE: Memory bitmaps are used for two types of operations only: - * "set a bit" and "find the next bit set". Moreover, the searching - * is always carried out after all of the "set a bit" operations - * on given bitmap. */ #define BM_END_OF_MAP (~0UL) @@ -443,15 +443,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) } /** - * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds + * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds * to given pfn. The cur_zone_bm member of @bm and the cur_block member * of @bm->cur_zone_bm are updated. - * - * If the bit cannot be set, the function returns -EINVAL . */ -static int -memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) { struct zone_bitmap *zone_bm; struct bm_block *bb; @@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) /* We don't assume that the zones are sorted by pfns */ while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { zone_bm = zone_bm->next; - if (unlikely(!zone_bm)) - return -EINVAL; + + BUG_ON(!zone_bm); } bm->cur.zone_bm = zone_bm; } @@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) while (pfn >= bb->end_pfn) { bb = bb->next; - if (unlikely(!bb)) - return -EINVAL; + + BUG_ON(!bb); } zone_bm->cur_block = bb; pfn -= bb->start_pfn; - set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); - return 0; + *bit_nr = pfn % BM_BITS_PER_CHUNK; + *addr = bb->data + pfn / BM_BITS_PER_CHUNK; +} + +static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + set_bit(bit, addr); +} + +static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + clear_bit(bit, addr); +} + +static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + return test_bit(bit, addr); } /* Two auxiliary functions for memory_bm_next_pfn */ @@ -563,6 +588,199 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit; } +/** + * This structure represents a range of page frames the contents of which + * should not be saved during the suspend. + */ + +struct nosave_region { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static LIST_HEAD(nosave_regions); + +/** + * register_nosave_region - register a range of page frames the contents + * of which should not be saved during the suspend (to be used in the early + * initialization code) + */ + +void __init +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) +{ + struct nosave_region *region; + + if (start_pfn >= end_pfn) + return; + + if (!list_empty(&nosave_regions)) { + /* Try to extend the previous region (they should be sorted) */ + region = list_entry(nosave_regions.prev, + struct nosave_region, list); + if (region->end_pfn == start_pfn) { + region->end_pfn = end_pfn; + goto Report; + } + } + /* This allocation cannot fail */ + region = alloc_bootmem_low(sizeof(struct nosave_region)); + region->start_pfn = start_pfn; + region->end_pfn = end_pfn; + list_add_tail(®ion->list, &nosave_regions); + Report: + printk("swsusp: Registered nosave memory region: %016lx - %016lx\n", + start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); +} + +/* + * Set bits in this map correspond to the page frames the contents of which + * should not be saved during the suspend. + */ +static struct memory_bitmap *forbidden_pages_map; + +/* Set bits in this map correspond to free page frames. */ +static struct memory_bitmap *free_pages_map; + +/* + * Each page frame allocated for creating the image is marked by setting the + * corresponding bits in forbidden_pages_map and free_pages_map simultaneously + */ + +void swsusp_set_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_set_bit(free_pages_map, page_to_pfn(page)); +} + +static int swsusp_page_is_free(struct page *page) +{ + return free_pages_map ? + memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; +} + +void swsusp_unset_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); +} + +static void swsusp_set_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); +} + +int swsusp_page_is_forbidden(struct page *page) +{ + return forbidden_pages_map ? + memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; +} + +static void swsusp_unset_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); +} + +/** + * mark_nosave_pages - set bits corresponding to the page frames the + * contents of which should not be saved in a given bitmap. + */ + +static void mark_nosave_pages(struct memory_bitmap *bm) +{ + struct nosave_region *region; + + if (list_empty(&nosave_regions)) + return; + + list_for_each_entry(region, &nosave_regions, list) { + unsigned long pfn; + + printk("swsusp: Marking nosave pages: %016lx - %016lx\n", + region->start_pfn << PAGE_SHIFT, + region->end_pfn << PAGE_SHIFT); + + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) + memory_bm_set_bit(bm, pfn); + } +} + +/** + * create_basic_memory_bitmaps - create bitmaps needed for marking page + * frames that should not be saved and free page frames. The pointers + * forbidden_pages_map and free_pages_map are only modified if everything + * goes well, because we don't want the bits to be used before both bitmaps + * are set up. + */ + +int create_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + int error = 0; + + BUG_ON(forbidden_pages_map || free_pages_map); + + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + if (!bm1) + return -ENOMEM; + + error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY); + if (error) + goto Free_first_object; + + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + if (!bm2) + goto Free_first_bitmap; + + error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY); + if (error) + goto Free_second_object; + + forbidden_pages_map = bm1; + free_pages_map = bm2; + mark_nosave_pages(forbidden_pages_map); + + printk("swsusp: Basic memory bitmaps created\n"); + + return 0; + + Free_second_object: + kfree(bm2); + Free_first_bitmap: + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + Free_first_object: + kfree(bm1); + return -ENOMEM; +} + +/** + * free_basic_memory_bitmaps - free memory bitmaps allocated by + * create_basic_memory_bitmaps(). The auxiliary pointers are necessary + * so that the bitmaps themselves are not referred to while they are being + * freed. + */ + +void free_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + + BUG_ON(!(forbidden_pages_map && free_pages_map)); + + bm1 = forbidden_pages_map; + bm2 = free_pages_map; + forbidden_pages_map = NULL; + free_pages_map = NULL; + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + kfree(bm1); + memory_bm_free(bm2, PG_UNSAFE_CLEAR); + kfree(bm2); + + printk("swsusp: Basic memory bitmaps freed\n"); +} + /** * snapshot_additional_pages - estimate the number of additional pages * be needed for setting up the suspend image data structures for given diff --git a/kernel/power/user.c b/kernel/power/user.c index 7cf6713b2325..845acd84cb23 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -52,6 +52,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_ACCMODE) == O_RDWR) return -ENOSYS; + if(create_basic_memory_bitmaps()) + return -ENOMEM; + nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -77,6 +80,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) struct snapshot_data *data; swsusp_free(); + free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap, data->bitmap); free_bitmap(data->bitmap); -- cgit v1.2.1 From 1525a2ad76f991eba9755f75c9b6d4d97abad25e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:44 -0700 Subject: swsusp: fix error paths in snapshot_open We forget to increase device_available if there's an error in snapshot_open(), so the snapshot device cannot be open at all after snapshot_open() has returned an error. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/user.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/user.c b/kernel/power/user.c index 845acd84cb23..bd1771f7a64e 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -49,12 +49,14 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (!atomic_add_unless(&device_available, -1, 0)) return -EBUSY; - if ((filp->f_flags & O_ACCMODE) == O_RDWR) + if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + atomic_inc(&device_available); return -ENOSYS; - - if(create_basic_memory_bitmaps()) + } + if(create_basic_memory_bitmaps()) { + atomic_inc(&device_available); return -ENOMEM; - + } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; -- cgit v1.2.1 From 0709db6072c2e799eba1aa61bd19e0d7f38aa2cd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:45 -0700 Subject: swsusp: use GFP_KERNEL for creating basic data structures Make swsusp call create_basic_memory_bitmaps() before processes are frozen, so that GFP_KERNEL allocations can be made in it. Additionally, ensure that the swsusp's userland interface won't be used while either pm_suspend_disk() or software_resume() is being executed. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 37 ++++++++++++++++++++++++++----------- kernel/power/power.h | 3 +++ kernel/power/snapshot.c | 8 ++++---- kernel/power/user.c | 10 +++++----- 4 files changed, 38 insertions(+), 20 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 403bc3722fee..e518379b667a 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -130,28 +130,33 @@ int pm_suspend_disk(void) { int error; + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) + return -EBUSY; + + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Exit; + error = prepare_processes(); if (error) - return error; + goto Finish; if (pm_disk_mode == PM_DISK_TESTPROC) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); goto Thaw; } - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Thaw; /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (error) - goto Finish; + goto Thaw; error = platform_prepare(); if (error) - goto Finish; + goto Thaw; suspend_console(); error = device_suspend(PMSG_FREEZE); @@ -186,7 +191,7 @@ int pm_suspend_disk(void) power_down(); else { swsusp_free(); - goto Finish; + goto Thaw; } } else { pr_debug("PM: Image restored successfully.\n"); @@ -199,10 +204,12 @@ int pm_suspend_disk(void) platform_finish(); device_resume(); resume_console(); - Finish: - free_basic_memory_bitmaps(); Thaw: unprepare_processes(); + Finish: + free_basic_memory_bitmaps(); + Exit: + atomic_inc(&snapshot_device_available); return error; } @@ -250,9 +257,15 @@ static int software_resume(void) if (error) goto Unlock; + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + error = create_basic_memory_bitmaps(); if (error) - goto Unlock; + goto Finish; pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); @@ -290,6 +303,8 @@ static int software_resume(void) unprepare_processes(); Done: free_basic_memory_bitmaps(); + Finish: + atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ Unlock: mutex_unlock(&pm_mutex); diff --git a/kernel/power/power.h b/kernel/power/power.h index 1f8052bda0f7..a64d3f22de97 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -140,6 +140,9 @@ struct resume_swap_area { #define PMOPS_ENTER 2 #define PMOPS_FINISH 3 +/* If unset, the snapshot device cannot be open. */ +extern atomic_t snapshot_device_available; + /** * The bitmap is used for tracing allocated swap pages * diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f66e4411795b..128da11f01c2 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -723,19 +723,19 @@ int create_basic_memory_bitmaps(void) BUG_ON(forbidden_pages_map || free_pages_map); - bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm1) return -ENOMEM; - error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY); + error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); if (error) goto Free_first_object; - bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC); + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm2) goto Free_first_bitmap; - error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY); + error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); if (error) goto Free_second_object; diff --git a/kernel/power/user.c b/kernel/power/user.c index bd1771f7a64e..72dbfd01408e 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -40,21 +40,21 @@ static struct snapshot_data { char platform_suspend; } snapshot_state; -static atomic_t device_available = ATOMIC_INIT(1); +atomic_t snapshot_device_available = ATOMIC_INIT(1); static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - if (!atomic_add_unless(&device_available, -1, 0)) + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) return -EBUSY; if ((filp->f_flags & O_ACCMODE) == O_RDWR) { - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return -ENOSYS; } if(create_basic_memory_bitmaps()) { - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return -ENOMEM; } nonseekable_open(inode, filp); @@ -92,7 +92,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) enable_nonboot_cpus(); mutex_unlock(&pm_mutex); } - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return 0; } -- cgit v1.2.1 From d1d241cc2c5feec057c370aa71637380b1b945d5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:47 -0700 Subject: swsusp: use rbtree for tracking allocated swap Make swsusp use extents instead of a bitmap to trace swap pages allocated for saving the image (the tracking is only needed in case there's an error, so that the allocated swap pages can be released). This should allow us to reduce the memory usage, practically always, and improve performance. Signed-off-by: Rafael J. Wysocki Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/power.h | 27 ++-------- kernel/power/swap.c | 18 ++----- kernel/power/swsusp.c | 137 +++++++++++++++++++++++++++----------------------- kernel/power/user.c | 22 ++------ 4 files changed, 86 insertions(+), 118 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/power.h b/kernel/power/power.h index a64d3f22de97..a3e47cbdaf31 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -143,30 +143,9 @@ struct resume_swap_area { /* If unset, the snapshot device cannot be open. */ extern atomic_t snapshot_device_available; -/** - * The bitmap is used for tracing allocated swap pages - * - * The entire bitmap consists of a number of bitmap_page - * structures linked with the help of the .next member. - * Thus each page can be allocated individually, so we only - * need to make 0-order memory allocations to create - * the bitmap. - */ - -#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *)) -#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long)) -#define BITS_PER_CHUNK (sizeof(long) * 8) -#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK) - -struct bitmap_page { - unsigned long chunks[BITMAP_PAGE_CHUNKS]; - struct bitmap_page *next; -}; - -extern void free_bitmap(struct bitmap_page *bitmap); -extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); -extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); -extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); +extern sector_t alloc_swapdev_block(int swap); +extern void free_all_swap_pages(int swap); +extern int swsusp_swap_in_use(void); extern int swsusp_check(void); extern int swsusp_shrink_memory(void); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b18c155cbb60..e83ed9945a80 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -243,7 +243,6 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; sector_t cur_swap; - struct bitmap_page *bitmap; unsigned int k; }; @@ -252,9 +251,6 @@ static void release_swap_writer(struct swap_map_handle *handle) if (handle->cur) free_page((unsigned long)handle->cur); handle->cur = NULL; - if (handle->bitmap) - free_bitmap(handle->bitmap); - handle->bitmap = NULL; } static int get_swap_writer(struct swap_map_handle *handle) @@ -262,12 +258,7 @@ static int get_swap_writer(struct swap_map_handle *handle) handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); if (!handle->cur) return -ENOMEM; - handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0)); - if (!handle->bitmap) { - release_swap_writer(handle); - return -ENOMEM; - } - handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); + handle->cur_swap = alloc_swapdev_block(root_swap); if (!handle->cur_swap) { release_swap_writer(handle); return -ENOSPC; @@ -284,7 +275,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!handle->cur) return -EINVAL; - offset = alloc_swapdev_block(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap); error = write_page(buf, offset, bio_chain); if (error) return error; @@ -293,7 +284,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, error = wait_on_bio_chain(bio_chain); if (error) goto out; - offset = alloc_swapdev_block(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; @@ -430,7 +421,8 @@ int swsusp_write(void) } } if (error) - free_all_swap_pages(root_swap, handle.bitmap); + free_all_swap_pages(root_swap); + release_swap_writer(&handle); out: swsusp_close(); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 175370824f37..1109023d8358 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "power.h" @@ -74,72 +75,69 @@ static inline unsigned int count_highmem_pages(void) { return 0; } /** * The following functions are used for tracing the allocated * swap pages, so that they can be freed in case of an error. - * - * The functions operate on a linked bitmap structure defined - * in power.h */ -void free_bitmap(struct bitmap_page *bitmap) -{ - struct bitmap_page *bp; +struct swsusp_extent { + struct rb_node node; + unsigned long start; + unsigned long end; +}; - while (bitmap) { - bp = bitmap->next; - free_page((unsigned long)bitmap); - bitmap = bp; - } -} +static struct rb_root swsusp_extents = RB_ROOT; -struct bitmap_page *alloc_bitmap(unsigned int nr_bits) +static int swsusp_extents_insert(unsigned long swap_offset) { - struct bitmap_page *bitmap, *bp; - unsigned int n; - - if (!nr_bits) - return NULL; - - bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); - bp = bitmap; - for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { - bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); - bp = bp->next; - if (!bp) { - free_bitmap(bitmap); - return NULL; + struct rb_node **new = &(swsusp_extents.rb_node); + struct rb_node *parent = NULL; + struct swsusp_extent *ext; + + /* Figure out where to put the new node */ + while (*new) { + ext = container_of(*new, struct swsusp_extent, node); + parent = *new; + if (swap_offset < ext->start) { + /* Try to merge */ + if (swap_offset == ext->start - 1) { + ext->start--; + return 0; + } + new = &((*new)->rb_left); + } else if (swap_offset > ext->end) { + /* Try to merge */ + if (swap_offset == ext->end + 1) { + ext->end++; + return 0; + } + new = &((*new)->rb_right); + } else { + /* It already is in the tree */ + return -EINVAL; } } - return bitmap; -} - -static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) -{ - unsigned int n; - - n = BITMAP_PAGE_BITS; - while (bitmap && n <= bit) { - n += BITMAP_PAGE_BITS; - bitmap = bitmap->next; - } - if (!bitmap) - return -EINVAL; - n -= BITMAP_PAGE_BITS; - bit -= n; - n = 0; - while (bit >= BITS_PER_CHUNK) { - bit -= BITS_PER_CHUNK; - n++; - } - bitmap->chunks[n] |= (1UL << bit); + /* Add the new node and rebalance the tree. */ + ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); + if (!ext) + return -ENOMEM; + + ext->start = swap_offset; + ext->end = swap_offset; + rb_link_node(&ext->node, parent, new); + rb_insert_color(&ext->node, &swsusp_extents); return 0; } -sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) +/** + * alloc_swapdev_block - allocate a swap page and register that it has + * been allocated, so that it can be freed in case of an error. + */ + +sector_t alloc_swapdev_block(int swap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { - if (bitmap_set(bitmap, offset)) + if (swsusp_extents_insert(offset)) swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); @@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) return 0; } -void free_all_swap_pages(int swap, struct bitmap_page *bitmap) +/** + * free_all_swap_pages - free swap pages allocated for saving image data. + * It also frees the extents used to register which swap entres had been + * allocated. + */ + +void free_all_swap_pages(int swap) { - unsigned int bit, n; - unsigned long test; - - bit = 0; - while (bitmap) { - for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) - for (test = 1UL; test; test <<= 1) { - if (bitmap->chunks[n] & test) - swap_free(swp_entry(swap, bit)); - bit++; - } - bitmap = bitmap->next; + struct rb_node *node; + + while ((node = swsusp_extents.rb_node)) { + struct swsusp_extent *ext; + unsigned long offset; + + ext = container_of(node, struct swsusp_extent, node); + rb_erase(node, &swsusp_extents); + for (offset = ext->start; offset <= ext->end; offset++) + swap_free(swp_entry(swap, offset)); + + kfree(ext); } } +int swsusp_swap_in_use(void) +{ + return (swsusp_extents.rb_node != NULL); +} + /** * swsusp_show_speed - print the time elapsed between two events represented by * @start and @stop diff --git a/kernel/power/user.c b/kernel/power/user.c index 72dbfd01408e..ad4e10208cde 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -33,7 +33,6 @@ static struct snapshot_data { struct snapshot_handle handle; int swap; - struct bitmap_page *bitmap; int mode; char frozen; char ready; @@ -69,7 +68,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; } - data->bitmap = NULL; data->frozen = 0; data->ready = 0; data->platform_suspend = 0; @@ -84,8 +82,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) swsusp_free(); free_basic_memory_bitmaps(); data = filp->private_data; - free_all_swap_pages(data->swap, data->bitmap); - free_bitmap(data->bitmap); + free_all_swap_pages(data->swap); if (data->frozen) { mutex_lock(&pm_mutex); thaw_processes(); @@ -300,14 +297,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENODEV; break; } - if (!data->bitmap) { - data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0)); - if (!data->bitmap) { - error = -ENOMEM; - break; - } - } - offset = alloc_swapdev_block(data->swap, data->bitmap); + offset = alloc_swapdev_block(data->swap); if (offset) { offset <<= PAGE_SHIFT; error = put_user(offset, (sector_t __user *)arg); @@ -321,13 +311,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENODEV; break; } - free_all_swap_pages(data->swap, data->bitmap); - free_bitmap(data->bitmap); - data->bitmap = NULL; + free_all_swap_pages(data->swap); break; case SNAPSHOT_SET_SWAP_FILE: - if (!data->bitmap) { + if (!swsusp_swap_in_use()) { /* * User space encodes device types as two-byte values, * so we need to recode them @@ -426,7 +414,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_SET_SWAP_AREA: - if (data->bitmap) { + if (swsusp_swap_in_use()) { error = -EPERM; } else { struct resume_swap_area swap_area; -- cgit v1.2.1 From ab3bfca7abf3fd0fe41d26d839610a787aa7e587 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 6 May 2007 14:50:49 -0700 Subject: remove software_suspend() Remove software_suspend() and all its users since pm_suspend(PM_SUSPEND_DISK) should be equivalent and there's no point in having two interfaces for the same thing. The patch also changes the valid_state function to return 0 (false) for PM_SUSPEND_DISK when SOFTWARE_SUSPEND is not configured instead of accepting it and having the whole thing fail later. Signed-off-by: Johannes Berg Acked-by: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/main.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/main.c b/kernel/power/main.c index 5a779270cdbc..f6dda685e7e2 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -184,17 +184,21 @@ static void suspend_finish(suspend_state_t state) static const char * const pm_states[PM_SUSPEND_MAX] = { [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", -#ifdef CONFIG_SOFTWARE_SUSPEND [PM_SUSPEND_DISK] = "disk", -#endif }; static inline int valid_state(suspend_state_t state) { /* Suspend-to-disk does not really need low-level support. - * It can work with reboot if needed. */ + * It can work with shutdown/reboot if needed. If it isn't + * configured, then it cannot be supported. + */ if (state == PM_SUSPEND_DISK) +#ifdef CONFIG_SOFTWARE_SUSPEND return 1; +#else + return 0; +#endif /* all other states need lowlevel support and need to be * valid to the lowlevel implementation, no valid callback @@ -244,17 +248,6 @@ static int enter_state(suspend_state_t state) return error; } -#ifdef CONFIG_SOFTWARE_SUSPEND -/* - * This is main interface to the outside world. It needs to be - * called from process context. - */ -int software_suspend(void) -{ - return enter_state(PM_SUSPEND_DISK); -} -#endif - /** * pm_suspend - Externally visible function for suspending system. -- cgit v1.2.1 From f0ced9b229cfbc76b5db9837b4b256b602d56610 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 6 May 2007 14:50:50 -0700 Subject: power management: change /sys/power/disk display Change /sys/power/disk to display all valid modes as well as the currently selected one in a fashion known from the LED subsystem. This changes userspace API, but it is apparently not used much (we asked some userspace developers) Signed-off-by: Johannes Berg Acked-by: "Rafael J. Wysocki" Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/disk.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e518379b667a..06331374d862 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -350,7 +350,34 @@ static const char * const pm_disk_modes[] = { static ssize_t disk_show(struct kset *kset, char *buf) { - return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); + int i; + char *start = buf; + + for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { + if (!pm_disk_modes[i]) + continue; + switch (i) { + case PM_DISK_SHUTDOWN: + case PM_DISK_REBOOT: + case PM_DISK_TEST: + case PM_DISK_TESTPROC: + break; + default: + if (pm_ops && pm_ops->enter && + (i == pm_ops->pm_disk_mode)) + break; + /* not a valid mode, continue with loop */ + continue; + } + if (i == pm_disk_mode) + buf += sprintf(buf, "[%s]", pm_disk_modes[i]); + else + buf += sprintf(buf, "%s", pm_disk_modes[i]); + if (i+1 != PM_DISK_MAX) + buf += sprintf(buf, " "); + } + buf += sprintf(buf, "\n"); + return buf-start; } -- cgit v1.2.1 From a7ee2e5f5b4c9c72f4390c60ba7ea30306f47188 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 6 May 2007 14:50:50 -0700 Subject: kconfig: mention 'hibernation' not just swsusp Clarify that "software suspend" is what's called "hibernation" in most user interfaces, shrinking a terminology gap. (Examples include Gnome and MS-Windows.) Also provide a more succinct description of what it does, so you won't have to read the whole novel in Kconfig; and highlights just why the lack of BIOS requirements for swsusp are a big deal. Signed-off-by: David Brownell Acked-by: "Rafael J. Wysocki" Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/Kconfig | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 51a4dd0f1b74..877721708fa4 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -78,17 +78,22 @@ config PM_SYSFS_DEPRECATED are likely to be bus or driver specific. config SOFTWARE_SUSPEND - bool "Software Suspend" + bool "Software Suspend (Hibernation)" depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) ---help--- - Enable the suspend to disk (STD) functionality. + Enable the suspend to disk (STD) functionality, which is usually + called "hibernation" in user interfaces. STD checkpoints the + system and powers it off; and restores that checkpoint on reboot. You can suspend your machine with 'echo disk > /sys/power/state'. Alternatively, you can use the additional userland tools available from . In principle it does not require ACPI or APM, although for example - ACPI will be used if available. + ACPI will be used for the final steps when it is available. One + of the reasons to use software suspend is that the firmware hooks + for suspend states like suspend-to-RAM (STR) often don't work very + well with Linux. It creates an image which is saved in your active swap. Upon the next boot, pass the 'resume=/dev/swappartition' argument to the kernel to -- cgit v1.2.1 From 9b95e43763cfdfebc1318d27e55712e7b6bfe098 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:51 -0700 Subject: swsusp: fix snapshot_release Remove the leftover enable_nonboot_cpus() from snapshot_release(). Signed-off-by: Rafael J. Wysocki Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/user.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/user.c b/kernel/power/user.c index ad4e10208cde..040560d9c312 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -86,7 +86,6 @@ static int snapshot_release(struct inode *inode, struct file *filp) if (data->frozen) { mutex_lock(&pm_mutex); thaw_processes(); - enable_nonboot_cpus(); mutex_unlock(&pm_mutex); } atomic_inc(&snapshot_device_available); -- cgit v1.2.1 From 56f99bcb52d64d70078b41cc176dd8b6f5763108 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 6 May 2007 14:50:52 -0700 Subject: swsusp: free more memory Move the definition of PAGES_FOR_IO to kernel/power/power.h and introduce SPARE_PAGES representing the number of pages that should be freed by the swsusp's memory shrinker in addition to PAGES_FOR_IO so that device drivers can allocate some memory (up to 1 MB total) in their .suspend() routines without causing the suspend to fail. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/power.h | 12 +++++++++++- kernel/power/swsusp.c | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/power.h b/kernel/power/power.h index a3e47cbdaf31..34b43542785a 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -14,8 +14,18 @@ struct swsusp_info { #ifdef CONFIG_SOFTWARE_SUSPEND -extern int pm_suspend_disk(void); +/* + * Keep some memory free so that I/O operations can succeed without paging + * [Might this be more than 4 MB?] + */ +#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT) +/* + * Keep 1 MB of memory free so that device drivers can allocate some pages in + * their .suspend() routines without breaking the suspend to disk. + */ +#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) +extern int pm_suspend_disk(void); #else static inline int pm_suspend_disk(void) { diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 1109023d8358..5da304c8f1f6 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -233,7 +233,7 @@ int swsusp_shrink_memory(void) long size, highmem_size; highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO; + size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; tmp = size; size += highmem_size; for_each_zone (zone) -- cgit v1.2.1