From 13ceef099edd2b70c5a6f3a9ef5d6d97cda2e096 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 14 Jul 2010 07:56:33 +0200
Subject: jbd2/ocfs2: Fix block checksumming when a buffer is used in several
 transactions

OCFS2 uses t_commit trigger to compute and store checksum of the just
committed blocks. When a buffer has b_frozen_data, checksum is computed
for it instead of b_data but this can result in an old checksum being
written to the filesystem in the following scenario:

1) transaction1 is opened
2) handle1 is opened
3) journal_access(handle1, bh)
    - This sets jh->b_transaction to transaction1
4) modify(bh)
5) journal_dirty(handle1, bh)
6) handle1 is closed
7) start committing transaction1, opening transaction2
8) handle2 is opened
9) journal_access(handle2, bh)
    - This copies off b_frozen_data to make it safe for transaction1 to commit.
      jh->b_next_transaction is set to transaction2.
10) jbd2_journal_write_metadata() checksums b_frozen_data
11) the journal correctly writes b_frozen_data to the disk journal
12) handle2 is closed
    - There was no dirty call for the bh on handle2, so it is never queued for
      any more journal operation
13) Checkpointing finally happens, and it just spools the bh via normal buffer
writeback.  This will write b_data, which was never triggered on and thus
contains a wrong (old) checksum.

This patch fixes the problem by calling the trigger at the moment data is
frozen for journal commit - i.e., either when b_frozen_data is created by
do_get_write_access or just before we write a buffer to the log if
b_frozen_data does not exist. We also rename the trigger to t_frozen as
that better describes when it is called.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 include/linux/jbd2.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a4d2e9f7088a..adf832dec3f3 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 struct jbd2_buffer_trigger_type {
 	/*
-	 * Fired just before a buffer is written to the journal.
-	 * mapped_data is a mapped buffer that is the frozen data for
-	 * commit.
+	 * Fired a the moment data to write to the journal are known to be
+	 * stable - so either at the moment b_frozen_data is created or just
+	 * before a buffer is written to the journal.  mapped_data is a mapped
+	 * buffer that is the frozen data for commit.
 	 */
-	void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+	void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
 			 struct buffer_head *bh, void *mapped_data,
 			 size_t size);
 
@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
 			struct buffer_head *bh);
 };
 
-extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
 				       void *mapped_data,
 				       struct jbd2_buffer_trigger_type *triggers);
 extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
-- 
cgit v1.2.1


From 58c84eda07560a6b75b03e8d3b26d6eddfc14011 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 15 Jul 2010 09:41:42 -0600
Subject: PCI: fall back to original BIOS BAR addresses

If we fail to assign resources to a PCI BAR, this patch makes us try the
original address from BIOS rather than leaving it disabled.

Linux tries to make sure all PCI device BARs are inside the upstream
PCI host bridge or P2P bridge apertures, reassigning BARs if necessary.
Windows does similar reassignment.

Before this patch, if we could not move a BAR into an aperture, we left
the resource unassigned, i.e., at address zero.  Windows leaves such BARs
at the original BIOS addresses, and this patch makes Linux do the same.

This is a bit ugly because we disable the resource long before we try to
reassign it, so we have to keep track of the BIOS BAR address somewhere.
For lack of a better place, I put it in the struct pci_dev.

I think it would be cleaner to attempt the assignment immediately when the
claim fails, so we could easily remember the original address.  But we
currently claim motherboard resources in the middle, after attempting to
claim PCI resources and before assigning new PCI resources, and changing
that is a fairly big job.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16263

Reported-by: Andrew <nitr0@seti.kr.ua>
Tested-by: Andrew <nitr0@seti.kr.ua>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cb00845f150..f26fda76b87f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -288,6 +288,7 @@ struct pci_dev {
 	 */
 	unsigned int	irq;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+	resource_size_t	fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
 
 	/* These fields are used by common fixups */
 	unsigned int	transparent:1;	/* Transparent PCI bridge */
-- 
cgit v1.2.1


From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Jul 2010 14:56:17 +1000
Subject: mm: add context argument to shrinker callback

The current shrinker implementation requires the registered callback
to have global state to work from. This makes it difficult to shrink
caches that are not global (e.g. per-filesystem caches). Pass the shrinker
structure to the callback so that users can embed the shrinker structure
in the context the shrinker needs to operate on and get back to it in the
callback via container_of().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b969efb03787..a2b48041b910 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-	int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+	int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
 	int seeks;	/* seeks to recreate an obj */
 
 	/* These are for internal use */
-- 
cgit v1.2.1


From 772a2f9b488f4d27c314da5eeabde750b9ead41b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 15 Jul 2010 10:39:47 +0200
Subject: fb: handle allocation failure in alloc_apertures()

If the kzalloc() fails we should return NULL.  All the places that call
alloc_apertures() check for this already.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: James Simmons <jsimmons@infradead.org>
Acked-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/fb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 8e5a9dfb76bf..e7445df44d6c 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -873,6 +873,8 @@ struct fb_info {
 static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 	struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct)
 			+ max_num * sizeof(struct aperture), GFP_KERNEL);
+	if (!a)
+		return NULL;
 	a->count = max_num;
 	return a;
 }
-- 
cgit v1.2.1


From 844b9a8707f1fcf0482e0c52f44a555e799ccda6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 20 Jul 2010 13:24:34 -0700
Subject: vfs: fix RCU-lockdep false positive due to /proc

If a single-threaded process does a file-descriptor operation, and some
other process accesses that same file descriptor via /proc, the current
rcu_dereference_check_fdtable() can give a false-positive RCU-lockdep
splat due to the reference count being increased by the /proc access after
the reference-count check in fget_light() but before the check in
rcu_dereference_check_fdtable().

This commit prevents this false positive by checking for a single-threaded
process.  To avoid #include hell, this commit uses the wrapper for
thread_group_empty(current) defined by rcu_my_thread_group_empty()
provided in a separate commit.

Located-by: Miles Lane <miles.lane@gmail.com>
Located-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fdtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 013dc529e95f..d147461bc271 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -61,7 +61,8 @@ struct files_struct {
 	(rcu_dereference_check((fdtfd), \
 			       rcu_read_lock_held() || \
 			       lockdep_is_held(&(files)->file_lock) || \
-			       atomic_read(&(files)->count) == 1))
+			       atomic_read(&(files)->count) == 1 || \
+			       rcu_my_thread_group_empty()))
 
 #define files_fdtable(files) \
 		(rcu_dereference_check_fdtable((files), (files)->fdt))
-- 
cgit v1.2.1


From a6a1a095ec8ace2912fc280d371eee8ff5da5736 Mon Sep 17 00:00:00 2001
From: Doug Goldstein <cardoe@gentoo.org>
Date: Tue, 20 Jul 2010 15:22:25 -0700
Subject: include/linux/vgaarb.h: add missing part of include guard

vgaarb.h was missing the #define of the #ifndef at the top for the guard
to prevent multiple #include's from causing re-define errors

Signed-off-by: Doug Goldstein <cardoe@gentoo.org>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index c9a975976995..814f294d4cd0 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -29,6 +29,7 @@
  */
 
 #ifndef LINUX_VGA_H
+#define LINUX_VGA_H
 
 #include <asm/vga.h>
 
-- 
cgit v1.2.1


From edd63cb6b91024332d6983fc51058ac1ef0c081e Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 21 Jul 2010 19:27:07 -0500
Subject: sysrq,kdb: Use __handle_sysrq() for kdb's sysrq function

The kdb code should not toggle the sysrq state in case an end user
wants to try and resume the normal kernel execution.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/sysrq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 4496322e28dd..609e8ca5f534 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -45,6 +45,7 @@ struct sysrq_key_op {
  */
 
 void handle_sysrq(int key, struct tty_struct *tty);
+void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
 int register_sysrq_key(int key, struct sysrq_key_op *op);
 int unregister_sysrq_key(int key, struct sysrq_key_op *op);
 struct sysrq_key_op *__sysrq_get_key_op(int key);
-- 
cgit v1.2.1


From 8a35747a5d13b99e076b0222729e0caa48cb69b6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 21 Jul 2010 21:44:31 +0000
Subject: macvtap: Limit packet queue length

Mark Wagner reported OOM symptoms when sending UDP traffic over
a macvtap link to a kvm receiver.

This appears to be caused by the fact that macvtap packet queues
are unlimited in length.  This means that if the receiver can't
keep up with the rate of flow, then we will hit OOM. Of course
it gets worse if the OOM killer then decides to kill the receiver.

This patch imposes a cap on the packet queue length, in the same
way as the tuntap driver, using the device TX queue length.

Please note that macvtap currently has no way of giving congestion
notification, that means the software device TX queue cannot be
used and packets will always be dropped once the macvtap driver
queue fills up.

This shouldn't be a great problem for the scenario where macvtap
is used to feed a kvm receiver, as the traffic is most likely
external in origin so congestion notification can't be applied
anyway.

Of course, if anybody decides to complain about guest-to-guest
UDP packet loss down the track, then we may have to revisit this.

Incidentally, this patch also fixes a real memory leak when
macvtap_get_queue fails.

Chris Wright noticed that for this patch to work, we need a
non-zero TX queue length.  This patch includes his work to change
the default macvtap TX queue length to 500.

Reported-by: Mark Wagner <mwagner@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047aca795..1ffaeffeff74 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -67,6 +67,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 	}
 }
 
+extern void macvlan_common_setup(struct net_device *dev);
+
 extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 				  struct nlattr *tb[], struct nlattr *data[],
 				  int (*receive)(struct sk_buff *skb),
-- 
cgit v1.2.1


From 72ad5d77fb981963edae15eee8196c80238f5ed0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 23 Jul 2010 22:59:09 +0200
Subject: ACPI / Sleep: Allow the NVS saving to be skipped during suspend to
 RAM

Commit 2a6b69765ad794389f2fc3e14a0afa1a995221c2
(ACPI: Store NVS state even when entering suspend to RAM) caused the
ACPI suspend code save the NVS area during suspend and restore it
during resume unconditionally, although it is known that some systems
need to use acpi_sleep=s4_nonvs for hibernation to work.  To allow
the affected systems to avoid saving and restoring the NVS area
during suspend to RAM and resume, introduce kernel command line
option acpi_sleep=nonvs and make acpi_sleep=s4_nonvs work as its
alias temporarily (add acpi_sleep=s4_nonvs to the feature removal
file).

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16396 .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-and-tested-by: tomas m <tmezzadra@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 224a38c960d4..ccf94dc5acdf 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -253,7 +253,7 @@ int acpi_resources_are_enforced(void);
 #ifdef CONFIG_PM_SLEEP
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
-void __init acpi_s4_no_nvs(void);
+void __init acpi_nvs_nosave(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.1