From 13ceef099edd2b70c5a6f3a9ef5d6d97cda2e096 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Jul 2010 07:56:33 +0200 Subject: jbd2/ocfs2: Fix block checksumming when a buffer is used in several transactions OCFS2 uses t_commit trigger to compute and store checksum of the just committed blocks. When a buffer has b_frozen_data, checksum is computed for it instead of b_data but this can result in an old checksum being written to the filesystem in the following scenario: 1) transaction1 is opened 2) handle1 is opened 3) journal_access(handle1, bh) - This sets jh->b_transaction to transaction1 4) modify(bh) 5) journal_dirty(handle1, bh) 6) handle1 is closed 7) start committing transaction1, opening transaction2 8) handle2 is opened 9) journal_access(handle2, bh) - This copies off b_frozen_data to make it safe for transaction1 to commit. jh->b_next_transaction is set to transaction2. 10) jbd2_journal_write_metadata() checksums b_frozen_data 11) the journal correctly writes b_frozen_data to the disk journal 12) handle2 is closed - There was no dirty call for the bh on handle2, so it is never queued for any more journal operation 13) Checkpointing finally happens, and it just spools the bh via normal buffer writeback. This will write b_data, which was never triggered on and thus contains a wrong (old) checksum. This patch fixes the problem by calling the trigger at the moment data is frozen for journal commit - i.e., either when b_frozen_data is created by do_get_write_access or just before we write a buffer to the log if b_frozen_data does not exist. We also rename the trigger to t_frozen as that better describes when it is called. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker --- include/linux/jbd2.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a4d2e9f7088a..adf832dec3f3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); struct jbd2_buffer_trigger_type { /* - * Fired just before a buffer is written to the journal. - * mapped_data is a mapped buffer that is the frozen data for - * commit. + * Fired a the moment data to write to the journal are known to be + * stable - so either at the moment b_frozen_data is created or just + * before a buffer is written to the journal. mapped_data is a mapped + * buffer that is the frozen data for commit. */ - void (*t_commit)(struct jbd2_buffer_trigger_type *type, + void (*t_frozen)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh, void *mapped_data, size_t size); @@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type { struct buffer_head *bh); }; -extern void jbd2_buffer_commit_trigger(struct journal_head *jh, +extern void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers); extern void jbd2_buffer_abort_trigger(struct journal_head *jh, -- cgit v1.2.1 From 58c84eda07560a6b75b03e8d3b26d6eddfc14011 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 15 Jul 2010 09:41:42 -0600 Subject: PCI: fall back to original BIOS BAR addresses If we fail to assign resources to a PCI BAR, this patch makes us try the original address from BIOS rather than leaving it disabled. Linux tries to make sure all PCI device BARs are inside the upstream PCI host bridge or P2P bridge apertures, reassigning BARs if necessary. Windows does similar reassignment. Before this patch, if we could not move a BAR into an aperture, we left the resource unassigned, i.e., at address zero. Windows leaves such BARs at the original BIOS addresses, and this patch makes Linux do the same. This is a bit ugly because we disable the resource long before we try to reassign it, so we have to keep track of the BIOS BAR address somewhere. For lack of a better place, I put it in the struct pci_dev. I think it would be cleaner to attempt the assignment immediately when the claim fails, so we could easily remember the original address. But we currently claim motherboard resources in the middle, after attempting to claim PCI resources and before assigning new PCI resources, and changing that is a fairly big job. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16263 Reported-by: Andrew Tested-by: Andrew Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cb00845f150..f26fda76b87f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -288,6 +288,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */ /* These fields are used by common fixups */ unsigned int transparent:1; /* Transparent PCI bridge */ -- cgit v1.2.1 From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 19 Jul 2010 14:56:17 +1000 Subject: mm: add context argument to shrinker callback The current shrinker implementation requires the registered callback to have global state to work from. This makes it difficult to shrink caches that are not global (e.g. per-filesystem caches). Pass the shrinker structure to the callback so that users can embed the shrinker structure in the context the shrinker needs to operate on and get back to it in the callback via container_of(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b969efb03787..a2b48041b910 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) * querying the cache size, so a fastpath for that case is appropriate. */ struct shrinker { - int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); int seeks; /* seeks to recreate an obj */ /* These are for internal use */ -- cgit v1.2.1 From 772a2f9b488f4d27c314da5eeabde750b9ead41b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Jul 2010 10:39:47 +0200 Subject: fb: handle allocation failure in alloc_apertures() If the kzalloc() fails we should return NULL. All the places that call alloc_apertures() check for this already. Signed-off-by: Dan Carpenter Acked-by: James Simmons Acked-by: Marcin Slusarz Signed-off-by: Dave Airlie --- include/linux/fb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 8e5a9dfb76bf..e7445df44d6c 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -873,6 +873,8 @@ struct fb_info { static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct) + max_num * sizeof(struct aperture), GFP_KERNEL); + if (!a) + return NULL; a->count = max_num; return a; } -- cgit v1.2.1 From 844b9a8707f1fcf0482e0c52f44a555e799ccda6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Jul 2010 13:24:34 -0700 Subject: vfs: fix RCU-lockdep false positive due to /proc If a single-threaded process does a file-descriptor operation, and some other process accesses that same file descriptor via /proc, the current rcu_dereference_check_fdtable() can give a false-positive RCU-lockdep splat due to the reference count being increased by the /proc access after the reference-count check in fget_light() but before the check in rcu_dereference_check_fdtable(). This commit prevents this false positive by checking for a single-threaded process. To avoid #include hell, this commit uses the wrapper for thread_group_empty(current) defined by rcu_my_thread_group_empty() provided in a separate commit. Located-by: Miles Lane Located-by: Eric Dumazet Signed-off-by: Paul E. McKenney Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fdtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 013dc529e95f..d147461bc271 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -61,7 +61,8 @@ struct files_struct { (rcu_dereference_check((fdtfd), \ rcu_read_lock_held() || \ lockdep_is_held(&(files)->file_lock) || \ - atomic_read(&(files)->count) == 1)) + atomic_read(&(files)->count) == 1 || \ + rcu_my_thread_group_empty())) #define files_fdtable(files) \ (rcu_dereference_check_fdtable((files), (files)->fdt)) -- cgit v1.2.1 From a6a1a095ec8ace2912fc280d371eee8ff5da5736 Mon Sep 17 00:00:00 2001 From: Doug Goldstein Date: Tue, 20 Jul 2010 15:22:25 -0700 Subject: include/linux/vgaarb.h: add missing part of include guard vgaarb.h was missing the #define of the #ifndef at the top for the guard to prevent multiple #include's from causing re-define errors Signed-off-by: Doug Goldstein Cc: Dave Airlie Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- include/linux/vgaarb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index c9a975976995..814f294d4cd0 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -29,6 +29,7 @@ */ #ifndef LINUX_VGA_H +#define LINUX_VGA_H #include -- cgit v1.2.1 From edd63cb6b91024332d6983fc51058ac1ef0c081e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 21 Jul 2010 19:27:07 -0500 Subject: sysrq,kdb: Use __handle_sysrq() for kdb's sysrq function The kdb code should not toggle the sysrq state in case an end user wants to try and resume the normal kernel execution. Signed-off-by: Jason Wessel Acked-by: Dmitry Torokhov --- include/linux/sysrq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 4496322e28dd..609e8ca5f534 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -45,6 +45,7 @@ struct sysrq_key_op { */ void handle_sysrq(int key, struct tty_struct *tty); +void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); int register_sysrq_key(int key, struct sysrq_key_op *op); int unregister_sysrq_key(int key, struct sysrq_key_op *op); struct sysrq_key_op *__sysrq_get_key_op(int key); -- cgit v1.2.1 From 8a35747a5d13b99e076b0222729e0caa48cb69b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 21 Jul 2010 21:44:31 +0000 Subject: macvtap: Limit packet queue length Mark Wagner reported OOM symptoms when sending UDP traffic over a macvtap link to a kvm receiver. This appears to be caused by the fact that macvtap packet queues are unlimited in length. This means that if the receiver can't keep up with the rate of flow, then we will hit OOM. Of course it gets worse if the OOM killer then decides to kill the receiver. This patch imposes a cap on the packet queue length, in the same way as the tuntap driver, using the device TX queue length. Please note that macvtap currently has no way of giving congestion notification, that means the software device TX queue cannot be used and packets will always be dropped once the macvtap driver queue fills up. This shouldn't be a great problem for the scenario where macvtap is used to feed a kvm receiver, as the traffic is most likely external in origin so congestion notification can't be applied anyway. Of course, if anybody decides to complain about guest-to-guest UDP packet loss down the track, then we may have to revisit this. Incidentally, this patch also fixes a real memory leak when macvtap_get_queue fails. Chris Wright noticed that for this patch to work, we need a non-zero TX queue length. This patch includes his work to change the default macvtap TX queue length to 500. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Acked-by: Chris Wright Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 9ea047aca795..1ffaeffeff74 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -67,6 +67,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, } } +extern void macvlan_common_setup(struct net_device *dev); + extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], int (*receive)(struct sk_buff *skb), -- cgit v1.2.1 From 72ad5d77fb981963edae15eee8196c80238f5ed0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Jul 2010 22:59:09 +0200 Subject: ACPI / Sleep: Allow the NVS saving to be skipped during suspend to RAM Commit 2a6b69765ad794389f2fc3e14a0afa1a995221c2 (ACPI: Store NVS state even when entering suspend to RAM) caused the ACPI suspend code save the NVS area during suspend and restore it during resume unconditionally, although it is known that some systems need to use acpi_sleep=s4_nonvs for hibernation to work. To allow the affected systems to avoid saving and restoring the NVS area during suspend to RAM and resume, introduce kernel command line option acpi_sleep=nonvs and make acpi_sleep=s4_nonvs work as its alias temporarily (add acpi_sleep=s4_nonvs to the feature removal file). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16396 . Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: tomas m Signed-off-by: Len Brown --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 224a38c960d4..ccf94dc5acdf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -253,7 +253,7 @@ int acpi_resources_are_enforced(void); #ifdef CONFIG_PM_SLEEP void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); -void __init acpi_s4_no_nvs(void); +void __init acpi_nvs_nosave(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.1