From a1a61a435b3cc157830b7d42b175151ae5eabdd3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 14 May 2008 23:24:09 -0700
Subject: atm: Cleanup atm_tcp.h and atm.h for userspace.

The atm_tcp.h uses types from linux/atm.h, but does not include it.
It should also use the standard __u## types from linux/types.h rather
than the uint##_t types since the former can be found with the kernel
already.

Same goes for linux/atm.h.  The linux/socket.h include there also gets
dropped as atm.h does not actually use anything from socket.h.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atm.h     |  7 ++-----
 include/linux/atm_tcp.h | 12 +++++-------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atm.h b/include/linux/atm.h
index 60136684e0af..c791ddd96939 100644
--- a/include/linux/atm.h
+++ b/include/linux/atm.h
@@ -16,14 +16,11 @@
  * documentation. Do not change them.
  */
 
-#ifdef __KERNEL__
-#include <linux/socket.h>
-#include <linux/types.h>
-#endif
 #include <linux/compiler.h>
 #include <linux/atmapi.h>
 #include <linux/atmsap.h>
 #include <linux/atmioc.h>
+#include <linux/types.h>
 
 
 /* general ATM constants */
@@ -212,7 +209,7 @@ struct sockaddr_atmsvc {
         char		pub[ATM_E164_LEN+1]; /* public address (E.164) */
     					/* unused addresses must be bzero'ed */
 	char		lij_type;	/* role in LIJ call; one of ATM_LIJ* */
-	uint32_t	lij_id;		/* LIJ call identifier */
+	__u32	lij_id;		/* LIJ call identifier */
     } sas_addr __ATM_API_ALIGN;		/* SVC address */
 };
 
diff --git a/include/linux/atm_tcp.h b/include/linux/atm_tcp.h
index 18787f9b2f19..375638f8554b 100644
--- a/include/linux/atm_tcp.h
+++ b/include/linux/atm_tcp.h
@@ -8,11 +8,9 @@
 #define LINUX_ATM_TCP_H
 
 #include <linux/atmapi.h>
-
-#ifdef __KERNEL__
-#include <linux/types.h>
-#endif
+#include <linux/atm.h>
 #include <linux/atmioc.h>
+#include <linux/types.h>
 
 
 /*
@@ -20,9 +18,9 @@
  */
 
 struct atmtcp_hdr {
-	uint16_t	vpi;
-	uint16_t	vci;
-	uint32_t	length;		/* ... of data part */
+	__u16	vpi;
+	__u16	vci;
+	__u32	length;		/* ... of data part */
 };
 
 /*
-- 
cgit v1.2.3


From f52111b1546943545e67573c4dde1c7613ca33d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 May 2008 18:19:16 -0400
Subject: [PATCH] take init_files to fs/file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/init_task.c     |  1 -
 arch/arm/kernel/init_task.c       |  1 -
 arch/avr32/kernel/init_task.c     |  1 -
 arch/blackfin/kernel/init_task.c  |  1 -
 arch/cris/kernel/process.c        |  1 -
 arch/frv/kernel/init_task.c       |  1 -
 arch/h8300/kernel/init_task.c     |  1 -
 arch/ia64/kernel/init_task.c      |  1 -
 arch/m32r/kernel/init_task.c      |  1 -
 arch/m68k/kernel/process.c        |  1 -
 arch/m68knommu/kernel/init_task.c |  1 -
 arch/mips/kernel/init_task.c      |  1 -
 arch/mn10300/kernel/init_task.c   |  1 -
 arch/parisc/kernel/init_task.c    |  1 -
 arch/powerpc/kernel/init_task.c   |  1 -
 arch/s390/kernel/init_task.c      |  1 -
 arch/sh/kernel/init_task.c        |  1 -
 arch/sparc/kernel/init_task.c     |  1 -
 arch/sparc64/kernel/init_task.c   |  1 -
 arch/um/kernel/init_task.c        |  1 -
 arch/v850/kernel/init_task.c      |  1 -
 arch/x86/kernel/init_task.c       |  1 -
 arch/xtensa/kernel/init_task.c    |  1 -
 fs/file.c                         | 13 +++++++++++++
 include/linux/init_task.h         | 23 +----------------------
 25 files changed, 14 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 835d09a7b332..1f762189fa64 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -9,7 +9,6 @@
 
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index bd4ef53bc6b9..8b8c9d38a761 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -13,7 +13,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index effcacf9d1a2..44058469c6ec 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -14,7 +14,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index c640154030e2..6bdba7b21109 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -34,7 +34,6 @@
 #include <linux/fs.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index ef2db8fd102a..5933656db5a2 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -38,7 +38,6 @@
  */
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index 22993932b3fc..e2198815b630 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -11,7 +11,6 @@
 
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 19272c2ac56a..93a4899e46c2 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -13,7 +13,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index bc8efcad28b8..9d7e1c66faf4 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -18,7 +18,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 9e508fd9d970..0d658dbb6766 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 5de4e4ed76ab..7888cdf91f5d 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -41,7 +41,6 @@
  * setup.
  */
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 3897043a126a..344c01aede08 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -13,7 +13,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index aeda7f58391b..d72487ad7c15 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index 39fe6882dd1d..af16f6e5c918 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -19,7 +19,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index 26198a074d67..f5941c086551 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -35,7 +35,6 @@
 #include <asm/pgalloc.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 941043ae040f..4c85b8d56478 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/uaccess.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index d494161b05b4..7ad003969251 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -17,7 +17,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index f9bcc606127e..b151a25cb14d 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct pt_regs fake_swapper_regs;
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index d9d4f96360c7..8e64ebc445ef 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -9,7 +9,6 @@
 #include <asm/uaccess.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/sparc64/kernel/init_task.c b/arch/sparc64/kernel/init_task.c
index 90007cf88bac..d2b312381c19 100644
--- a/arch/sparc64/kernel/init_task.c
+++ b/arch/sparc64/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/processor.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index dcfceca95052..910eda8fca18 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -12,7 +12,6 @@
 
 static struct fs_struct init_fs = INIT_FS;
 struct mm_struct init_mm = INIT_MM(init_mm);
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 EXPORT_SYMBOL(init_mm);
diff --git a/arch/v850/kernel/init_task.c b/arch/v850/kernel/init_task.c
index ed2f93cf7c66..44b274dff33f 100644
--- a/arch/v850/kernel/init_task.c
+++ b/arch/v850/kernel/init_task.c
@@ -21,7 +21,6 @@
 #include <asm/pgtable.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS (init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM (init_mm);
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3d01e47777db..a4f93b4120c1 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -11,7 +11,6 @@
 #include <asm/desc.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index 021b4f46ff94..3df469dbe814 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -22,7 +22,6 @@
 #include <asm/uaccess.h>
 
 static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/fs/file.c b/fs/file.c
index 4c6f0ea12c41..754cd05b06af 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -275,3 +275,16 @@ void __init files_defer_init(void)
 	for_each_possible_cpu(i)
 		fdtable_defer_list_init(i);
 }
+
+struct files_struct init_files = {
+	.count		= ATOMIC_INIT(1),
+	.fdt		= &init_files.fdtab,
+	.fdtab		= {
+		.max_fds	= NR_OPEN_DEFAULT,
+		.fd		= &init_files.fd_array[0],
+		.close_on_exec	= (fd_set *)&init_files.close_on_exec_init,
+		.open_fds	= (fd_set *)&init_files.open_fds_init,
+		.rcu		= RCU_HEAD_INIT,
+	},
+	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
+};
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index b24c2875aa05..9927a88674a3 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX__INIT_TASK_H
 #define _LINUX__INIT_TASK_H
 
-#include <linux/fdtable.h>
 #include <linux/rcupdate.h>
 #include <linux/irqflags.h>
 #include <linux/utsname.h>
@@ -12,27 +11,7 @@
 #include <linux/securebits.h>
 #include <net/net_namespace.h>
 
-#define INIT_FDTABLE \
-{							\
-	.max_fds	= NR_OPEN_DEFAULT, 		\
-	.fd		= &init_files.fd_array[0], 	\
-	.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, \
-	.open_fds	= (fd_set *)&init_files.open_fds_init, 	\
-	.rcu		= RCU_HEAD_INIT, 		\
-	.next		= NULL,		 		\
-}
-
-#define INIT_FILES \
-{ 							\
-	.count		= ATOMIC_INIT(1), 		\
-	.fdt		= &init_files.fdtab, 		\
-	.fdtab		= INIT_FDTABLE,			\
-	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock), \
-	.next_fd	= 0, 				\
-	.close_on_exec_init = { { 0, } }, 		\
-	.open_fds_init	= { { 0, } }, 			\
-	.fd_array	= { NULL, } 			\
-}
+extern struct files_struct init_files;
 
 #define INIT_KIOCTX(name, which_mm) \
 {							\
-- 
cgit v1.2.3


From 02afc6267f6d55d47aba9fcafdbd1b7230d2294a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 May 2008 19:42:56 -0400
Subject: [PATCH] dup_fd() fixes, part 1

Move the sucker to fs/file.c in preparation to the rest

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c               | 130 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fdtable.h |   1 +
 kernel/fork.c           | 130 ------------------------------------------------
 3 files changed, 131 insertions(+), 130 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file.c b/fs/file.c
index 754cd05b06af..7dbadaaf00f0 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -261,6 +261,136 @@ int expand_files(struct files_struct *files, int nr)
 	return expand_fdtable(files, nr);
 }
 
+static int count_open_files(struct fdtable *fdt)
+{
+	int size = fdt->max_fds;
+	int i;
+
+	/* Find the last open fd */
+	for (i = size/(8*sizeof(long)); i > 0; ) {
+		if (fdt->open_fds->fds_bits[--i])
+			break;
+	}
+	i = (i+1) * 8 * sizeof(long);
+	return i;
+}
+
+static struct files_struct *alloc_files(void)
+{
+	struct files_struct *newf;
+	struct fdtable *fdt;
+
+	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
+	if (!newf)
+		goto out;
+
+	atomic_set(&newf->count, 1);
+
+	spin_lock_init(&newf->file_lock);
+	newf->next_fd = 0;
+	fdt = &newf->fdtab;
+	fdt->max_fds = NR_OPEN_DEFAULT;
+	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+	fdt->open_fds = (fd_set *)&newf->open_fds_init;
+	fdt->fd = &newf->fd_array[0];
+	INIT_RCU_HEAD(&fdt->rcu);
+	fdt->next = NULL;
+	rcu_assign_pointer(newf->fdt, fdt);
+out:
+	return newf;
+}
+
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ * errorp will be valid only when the returned files_struct is NULL.
+ */
+struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
+{
+	struct files_struct *newf;
+	struct file **old_fds, **new_fds;
+	int open_files, size, i;
+	struct fdtable *old_fdt, *new_fdt;
+
+	*errorp = -ENOMEM;
+	newf = alloc_files();
+	if (!newf)
+		goto out;
+
+	spin_lock(&oldf->file_lock);
+	old_fdt = files_fdtable(oldf);
+	new_fdt = files_fdtable(newf);
+	open_files = count_open_files(old_fdt);
+
+	/*
+	 * Check whether we need to allocate a larger fd array and fd set.
+	 * Note: we're not a clone task, so the open count won't change.
+	 */
+	if (open_files > new_fdt->max_fds) {
+		new_fdt->max_fds = 0;
+		spin_unlock(&oldf->file_lock);
+		spin_lock(&newf->file_lock);
+		*errorp = expand_files(newf, open_files-1);
+		spin_unlock(&newf->file_lock);
+		if (*errorp < 0)
+			goto out_release;
+		new_fdt = files_fdtable(newf);
+		/*
+		 * Reacquire the oldf lock and a pointer to its fd table
+		 * who knows it may have a new bigger fd table. We need
+		 * the latest pointer.
+		 */
+		spin_lock(&oldf->file_lock);
+		old_fdt = files_fdtable(oldf);
+	}
+
+	old_fds = old_fdt->fd;
+	new_fds = new_fdt->fd;
+
+	memcpy(new_fdt->open_fds->fds_bits,
+		old_fdt->open_fds->fds_bits, open_files/8);
+	memcpy(new_fdt->close_on_exec->fds_bits,
+		old_fdt->close_on_exec->fds_bits, open_files/8);
+
+	for (i = open_files; i != 0; i--) {
+		struct file *f = *old_fds++;
+		if (f) {
+			get_file(f);
+		} else {
+			/*
+			 * The fd may be claimed in the fd bitmap but not yet
+			 * instantiated in the files array if a sibling thread
+			 * is partway through open().  So make sure that this
+			 * fd is available to the new process.
+			 */
+			FD_CLR(open_files - i, new_fdt->open_fds);
+		}
+		rcu_assign_pointer(*new_fds++, f);
+	}
+	spin_unlock(&oldf->file_lock);
+
+	/* compute the remainder to be cleared */
+	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
+
+	/* This is long word aligned thus could use a optimized version */
+	memset(new_fds, 0, size);
+
+	if (new_fdt->max_fds > open_files) {
+		int left = (new_fdt->max_fds-open_files)/8;
+		int start = open_files / (8 * sizeof(unsigned long));
+
+		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
+		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+	}
+
+	return newf;
+
+out_release:
+	kmem_cache_free(files_cachep, newf);
+out:
+	return NULL;
+}
+
 static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index a118f3c0b240..4aab6f12cfab 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -93,6 +93,7 @@ struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct files_struct *);
 int unshare_files(struct files_struct **);
+struct files_struct *dup_fd(struct files_struct *, int *);
 
 extern struct kmem_cache *files_cachep;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 933e60ebccae..19908b26cf80 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -660,136 +660,6 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 	return 0;
 }
 
-static int count_open_files(struct fdtable *fdt)
-{
-	int size = fdt->max_fds;
-	int i;
-
-	/* Find the last open fd */
-	for (i = size/(8*sizeof(long)); i > 0; ) {
-		if (fdt->open_fds->fds_bits[--i])
-			break;
-	}
-	i = (i+1) * 8 * sizeof(long);
-	return i;
-}
-
-static struct files_struct *alloc_files(void)
-{
-	struct files_struct *newf;
-	struct fdtable *fdt;
-
-	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
-	if (!newf)
-		goto out;
-
-	atomic_set(&newf->count, 1);
-
-	spin_lock_init(&newf->file_lock);
-	newf->next_fd = 0;
-	fdt = &newf->fdtab;
-	fdt->max_fds = NR_OPEN_DEFAULT;
-	fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
-	fdt->open_fds = (fd_set *)&newf->open_fds_init;
-	fdt->fd = &newf->fd_array[0];
-	INIT_RCU_HEAD(&fdt->rcu);
-	fdt->next = NULL;
-	rcu_assign_pointer(newf->fdt, fdt);
-out:
-	return newf;
-}
-
-/*
- * Allocate a new files structure and copy contents from the
- * passed in files structure.
- * errorp will be valid only when the returned files_struct is NULL.
- */
-static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
-{
-	struct files_struct *newf;
-	struct file **old_fds, **new_fds;
-	int open_files, size, i;
-	struct fdtable *old_fdt, *new_fdt;
-
-	*errorp = -ENOMEM;
-	newf = alloc_files();
-	if (!newf)
-		goto out;
-
-	spin_lock(&oldf->file_lock);
-	old_fdt = files_fdtable(oldf);
-	new_fdt = files_fdtable(newf);
-	open_files = count_open_files(old_fdt);
-
-	/*
-	 * Check whether we need to allocate a larger fd array and fd set.
-	 * Note: we're not a clone task, so the open count won't change.
-	 */
-	if (open_files > new_fdt->max_fds) {
-		new_fdt->max_fds = 0;
-		spin_unlock(&oldf->file_lock);
-		spin_lock(&newf->file_lock);
-		*errorp = expand_files(newf, open_files-1);
-		spin_unlock(&newf->file_lock);
-		if (*errorp < 0)
-			goto out_release;
-		new_fdt = files_fdtable(newf);
-		/*
-		 * Reacquire the oldf lock and a pointer to its fd table
-		 * who knows it may have a new bigger fd table. We need
-		 * the latest pointer.
-		 */
-		spin_lock(&oldf->file_lock);
-		old_fdt = files_fdtable(oldf);
-	}
-
-	old_fds = old_fdt->fd;
-	new_fds = new_fdt->fd;
-
-	memcpy(new_fdt->open_fds->fds_bits,
-		old_fdt->open_fds->fds_bits, open_files/8);
-	memcpy(new_fdt->close_on_exec->fds_bits,
-		old_fdt->close_on_exec->fds_bits, open_files/8);
-
-	for (i = open_files; i != 0; i--) {
-		struct file *f = *old_fds++;
-		if (f) {
-			get_file(f);
-		} else {
-			/*
-			 * The fd may be claimed in the fd bitmap but not yet
-			 * instantiated in the files array if a sibling thread
-			 * is partway through open().  So make sure that this
-			 * fd is available to the new process.
-			 */
-			FD_CLR(open_files - i, new_fdt->open_fds);
-		}
-		rcu_assign_pointer(*new_fds++, f);
-	}
-	spin_unlock(&oldf->file_lock);
-
-	/* compute the remainder to be cleared */
-	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
-
-	/* This is long word aligned thus could use a optimized version */
-	memset(new_fds, 0, size);
-
-	if (new_fdt->max_fds > open_files) {
-		int left = (new_fdt->max_fds-open_files)/8;
-		int start = open_files / (8 * sizeof(unsigned long));
-
-		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
-		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
-	}
-
-	return newf;
-
-out_release:
-	kmem_cache_free(files_cachep, newf);
-out:
-	return NULL;
-}
-
 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct files_struct *oldf, *newf;
-- 
cgit v1.2.3


From 08a6fac1c63233c87eec129938022f1a9a4d51f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 May 2008 16:38:25 -0400
Subject: [PATCH] get rid of leak in compat_execve()

Even though copy_compat_strings() doesn't cache the pages,
copy_strings_kernel() and stuff indirectly called by e.g.
->load_binary() is doing that, so we need to drop the
cache contents in the end.

[found by WANG Cong <wangcong@zeuux.org>]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c             |  4 ++--
 fs/exec.c               | 12 ++++++++----
 include/linux/binfmts.h |  1 +
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 332a869d2c53..ed43e17a5dc6 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1405,7 +1405,7 @@ int compat_do_execve(char * filename,
 		/* execve success */
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
-		kfree(bprm);
+		free_bprm(bprm);
 		return retval;
 	}
 
@@ -1424,7 +1424,7 @@ out_file:
 	}
 
 out_kfree:
-	kfree(bprm);
+	free_bprm(bprm);
 
 out_ret:
 	return retval;
diff --git a/fs/exec.c b/fs/exec.c
index 1f8a24aa1f8b..3c2ba7ce11d4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1251,6 +1251,12 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 
 EXPORT_SYMBOL(search_binary_handler);
 
+void free_bprm(struct linux_binprm *bprm)
+{
+	free_arg_pages(bprm);
+	kfree(bprm);
+}
+
 /*
  * sys_execve() executes a new program.
  */
@@ -1320,17 +1326,15 @@ int do_execve(char * filename,
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
 		/* execve success */
-		free_arg_pages(bprm);
 		security_bprm_free(bprm);
 		acct_update_integrals(current);
-		kfree(bprm);
+		free_bprm(bprm);
 		if (displaced)
 			put_files_struct(displaced);
 		return retval;
 	}
 
 out:
-	free_arg_pages(bprm);
 	if (bprm->security)
 		security_bprm_free(bprm);
 
@@ -1344,7 +1348,7 @@ out_file:
 		fput(bprm->file);
 	}
 out_kfree:
-	kfree(bprm);
+	free_bprm(bprm);
 
 out_files:
 	if (displaced)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index b512e48f6d8e..ee0ed48e8348 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -99,6 +99,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern void compute_creds(struct linux_binprm *binprm);
 extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
 extern int set_binfmt(struct linux_binfmt *new);
+extern void free_bprm(struct linux_binprm *);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_BINFMTS_H */
-- 
cgit v1.2.3


From 10a38c33f46d128d11e299acba744bc325cde420 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 30 Apr 2008 17:32:17 -0500
Subject: svcrdma: Remove unused READ_DONE context flags bit

The RDMACTXT_F_READ_DONE bit is not longer used. Remove it.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 1 -
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 -
 3 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index c11bbcc081f9..d0011f3db90c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -85,7 +85,6 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-#define RDMACTXT_F_READ_DONE	1
 #define RDMACTXT_F_LAST_CTXT	2
 
 struct svcxprt_rdma {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 5e03d95b25e2..80c6ee82c34b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -325,7 +325,6 @@ next_sge:
 		}
 		ctxt->next = NULL;
 		ctxt->direction = DMA_FROM_DEVICE;
-		clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
 		/* Prepare READ WR */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d9ed5f24c362..4a79dfda1465 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -353,7 +353,6 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 		case IB_WR_RDMA_READ:
 			if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 				set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-				set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
 				spin_lock_bh(&xprt->sc_read_complete_lock);
 				list_add_tail(&ctxt->dto_q,
 					      &xprt->sc_read_complete_q);
-- 
cgit v1.2.3


From 02e7452de74d308ca642f54f7e5ef801ced60a92 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 30 Apr 2008 19:50:56 -0500
Subject: svcrdma: Simplify RDMA_READ deferral buffer management

An NFS_WRITE requires a set of RDMA_READ requests to fetch the write
data from the client. There are two principal pieces of data that
need to be tracked: the list of pages that comprise the completed RPC
and the SGE of dma mapped pages to refer to this list of pages. Previously
this whole bit was managed as a linked list of contexts with the
context containing the page list buried in this list. This patch
simplifies this processing by not keeping a linked list, but rather only
a pionter from the last submitted RDMA_READ's context to the context
that maps the set of pages that describe the RPC.  This significantly
simplifies this code path. SGE contexts are cleaned up inline in the DTO
path instead of at read completion time.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |  1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 58 ++++++--------------------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  5 ++-
 3 files changed, 16 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d0011f3db90c..c447c417b37b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -71,6 +71,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
+	struct svc_rdma_op_ctxt *read_hdr;
 	struct svc_rdma_op_ctxt *next;
 	struct xdr_buf arg;
 	struct list_head dto_q;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 80c6ee82c34b..21a1e625ef03 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -289,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	u64 sgl_offset;
 	struct rpcrdma_read_chunk *ch;
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_op_ctxt *head;
 	struct svc_rdma_op_ctxt *tmp_sge_ctxt;
 	struct svc_rdma_op_ctxt *tmp_ch_ctxt;
 	struct chunk_sge *ch_sge_ary;
@@ -310,20 +309,13 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
 				    sge, ch_sge_ary,
 				    ch_count, byte_count);
-	head = svc_rdma_get_context(xprt);
 	sgl_offset = 0;
 	ch_no = 0;
 
 	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
 	     ch->rc_discrim != 0; ch++, ch_no++) {
 next_sge:
-		if (!ctxt)
-			ctxt = head;
-		else {
-			ctxt->next = svc_rdma_get_context(xprt);
-			ctxt = ctxt->next;
-		}
-		ctxt->next = NULL;
+		ctxt = svc_rdma_get_context(xprt);
 		ctxt->direction = DMA_FROM_DEVICE;
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
@@ -351,20 +343,15 @@ next_sge:
 			 * the client and the RPC needs to be enqueued.
 			 */
 			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-			ctxt->next = hdr_ctxt;
-			hdr_ctxt->next = head;
+			ctxt->read_hdr = hdr_ctxt;
 		}
 		/* Post the read */
 		err = svc_rdma_send(xprt, &read_wr);
 		if (err) {
-			printk(KERN_ERR "svcrdma: Error posting send = %d\n",
+			printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
 			       err);
-			/*
-			 * Break the circular list so free knows when
-			 * to stop if the error happened to occur on
-			 * the last read
-			 */
-			ctxt->next = NULL;
+			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			svc_rdma_put_context(ctxt, 0);
 			goto out;
 		}
 		atomic_inc(&rdma_stat_read);
@@ -375,7 +362,7 @@ next_sge:
 			goto next_sge;
 		}
 		sgl_offset = 0;
-		err = 0;
+		err = 1;
 	}
 
  out:
@@ -393,25 +380,12 @@ next_sge:
 	while (rqstp->rq_resused)
 		rqstp->rq_respages[--rqstp->rq_resused] = NULL;
 
-	if (err) {
-		printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		/* Free the linked list of read contexts */
-		while (head != NULL) {
-			ctxt = head->next;
-			svc_rdma_put_context(head, 1);
-			head = ctxt;
-		}
-		return err;
-	}
-
-	return 1;
+	return err;
 }
 
 static int rdma_read_complete(struct svc_rqst *rqstp,
-			      struct svc_rdma_op_ctxt *data)
+			      struct svc_rdma_op_ctxt *head)
 {
-	struct svc_rdma_op_ctxt *head = data->next;
 	int page_no;
 	int ret;
 
@@ -437,22 +411,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 	rqstp->rq_arg.len = head->arg.len;
 	rqstp->rq_arg.buflen = head->arg.buflen;
 
+	/* Free the context */
+	svc_rdma_put_context(head, 0);
+
 	/* XXX: What should this be? */
 	rqstp->rq_prot = IPPROTO_MAX;
 
-	/*
-	 * Free the contexts we used to build the RDMA_READ. We have
-	 * to be careful here because the context list uses the same
-	 * next pointer used to chain the contexts associated with the
-	 * RDMA_READ
-	 */
-	data->next = NULL;	/* terminate circular list */
-	do {
-		data = head->next;
-		svc_rdma_put_context(head, 0);
-		head = data;
-	} while (head != NULL);
-
 	ret = rqstp->rq_arg.head[0].iov_len
 		+ rqstp->rq_arg.page_len
 		+ rqstp->rq_arg.tail[0].iov_len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4a79dfda1465..34141eaf25a0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -352,13 +352,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
 
 		case IB_WR_RDMA_READ:
 			if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+				struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+				BUG_ON(!read_hdr);
 				set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 				spin_lock_bh(&xprt->sc_read_complete_lock);
-				list_add_tail(&ctxt->dto_q,
+				list_add_tail(&read_hdr->dto_q,
 					      &xprt->sc_read_complete_q);
 				spin_unlock_bh(&xprt->sc_read_complete_lock);
 				svc_xprt_enqueue(&xprt->sc_xprt);
 			}
+			svc_rdma_put_context(ctxt, 0);
 			break;
 
 		default:
-- 
cgit v1.2.3


From 8740767376b32a7772607e1b2b07cde0c24120cc Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 30 Apr 2008 20:44:39 -0500
Subject: svcrdma: Use standard Linux lists for context cache

Replace the one-off linked list implementation used to implement the
context cache with the standard Linux list_head lists. Add a context
counter to catch resource leaks. A WARN_ON will be added later to
ensure that we've freed all contexts.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |  5 ++--
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 47 ++++++++++++++++++--------------
 2 files changed, 29 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index c447c417b37b..701439064d21 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,7 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct svc_rdma_op_ctxt *read_hdr;
-	struct svc_rdma_op_ctxt *next;
+	struct list_head free_list;
 	struct xdr_buf arg;
 	struct list_head dto_q;
 	enum ib_wr_opcode wr_op;
@@ -104,7 +104,8 @@ struct svcxprt_rdma {
 
 	struct ib_pd         *sc_pd;
 
-	struct svc_rdma_op_ctxt  *sc_ctxt_head;
+	atomic_t	     sc_ctxt_used;
+	struct list_head     sc_ctxt_free;
 	int		     sc_ctxt_cnt;
 	int		     sc_ctxt_bump;
 	int		     sc_ctxt_max;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 34141eaf25a0..817cf4de746c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
 		spin_lock_bh(&xprt->sc_ctxt_lock);
 		if (ctxt) {
 			at_least_one = 1;
-			ctxt->next = xprt->sc_ctxt_head;
-			xprt->sc_ctxt_head = ctxt;
+			INIT_LIST_HEAD(&ctxt->free_list);
+			list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
 		} else {
 			/* kmalloc failed...give up for now */
 			xprt->sc_ctxt_cnt--;
@@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 
 	while (1) {
 		spin_lock_bh(&xprt->sc_ctxt_lock);
-		if (unlikely(xprt->sc_ctxt_head == NULL)) {
+		if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
 			/* Try to bump my cache. */
 			spin_unlock_bh(&xprt->sc_ctxt_lock);
 
@@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 			schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 			continue;
 		}
-		ctxt = xprt->sc_ctxt_head;
-		xprt->sc_ctxt_head = ctxt->next;
+		ctxt = list_entry(xprt->sc_ctxt_free.next,
+				  struct svc_rdma_op_ctxt,
+				  free_list);
+		list_del_init(&ctxt->free_list);
 		spin_unlock_bh(&xprt->sc_ctxt_lock);
 		ctxt->xprt = xprt;
 		INIT_LIST_HEAD(&ctxt->dto_q);
 		ctxt->count = 0;
+		atomic_inc(&xprt->sc_ctxt_used);
 		break;
 	}
 	return ctxt;
@@ -163,10 +166,11 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 				 ctxt->sge[i].addr,
 				 ctxt->sge[i].length,
 				 ctxt->direction);
+
 	spin_lock_bh(&xprt->sc_ctxt_lock);
-	ctxt->next = xprt->sc_ctxt_head;
-	xprt->sc_ctxt_head = ctxt;
+	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
 	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	atomic_dec(&xprt->sc_ctxt_used);
 }
 
 /* ib_cq event handler */
@@ -412,28 +416,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt,
 	xprt->sc_ctxt_max = ctxt_max;
 	xprt->sc_ctxt_bump = ctxt_bump;
 	xprt->sc_ctxt_cnt = 0;
-	xprt->sc_ctxt_head = NULL;
+	atomic_set(&xprt->sc_ctxt_used, 0);
+
+	INIT_LIST_HEAD(&xprt->sc_ctxt_free);
 	for (i = 0; i < ctxt_count; i++) {
 		ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
 		if (ctxt) {
-			ctxt->next = xprt->sc_ctxt_head;
-			xprt->sc_ctxt_head = ctxt;
+			INIT_LIST_HEAD(&ctxt->free_list);
+			list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
 			xprt->sc_ctxt_cnt++;
 		}
 	}
 }
 
-static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt)
+static void destroy_context_cache(struct svcxprt_rdma *xprt)
 {
-	struct svc_rdma_op_ctxt *next;
-	if (!ctxt)
-		return;
-
-	do {
-		next = ctxt->next;
+	while (!list_empty(&xprt->sc_ctxt_free)) {
+		struct svc_rdma_op_ctxt *ctxt;
+		ctxt = list_entry(xprt->sc_ctxt_free.next,
+				  struct svc_rdma_op_ctxt,
+				  free_list);
+		list_del_init(&ctxt->free_list);
 		kfree(ctxt);
-		ctxt = next;
-	} while (next);
+	}
 }
 
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -470,7 +475,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 				     reqs +
 				     cma_xprt->sc_sq_depth +
 				     RPCRDMA_MAX_THREADS + 1); /* max */
-		if (!cma_xprt->sc_ctxt_head) {
+		if (list_empty(&cma_xprt->sc_ctxt_free)) {
 			kfree(cma_xprt);
 			return NULL;
 		}
@@ -976,7 +981,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
 	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
 		ib_dealloc_pd(rdma->sc_pd);
 
-	destroy_context_cache(rdma->sc_ctxt_head);
+	destroy_context_cache(rdma);
 	kfree(rdma);
 }
 
-- 
cgit v1.2.3


From 8da91ea8de873ee8be82377ff18637d05e882058 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 30 Apr 2008 22:00:46 -0500
Subject: svcrdma: Move destroy to kernel thread

Some providers may wait while destroying adapter resources.
Since it is possible that the last reference is put on the
dto_tasklet, the actual destroy must be scheduled as a work item.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |  1 +
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 701439064d21..f5f15ae2438b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -124,6 +124,7 @@ struct svcxprt_rdma {
 	struct list_head     sc_dto_q;		/* DTO tasklet I/O pending Q */
 	struct list_head     sc_read_complete_q;
 	spinlock_t           sc_read_complete_lock;
+	struct work_struct   sc_work;
 };
 /* sc_flags */
 #define RDMAXPRT_RQ_PENDING	1
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 78303f0fad92..028c6cf89364 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -963,12 +963,15 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
 	rdma_destroy_id(rdma->sc_cm_id);
 }
 
-static void svc_rdma_free(struct svc_xprt *xprt)
+static void __svc_rdma_free(struct work_struct *work)
 {
-	struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
+	struct svcxprt_rdma *rdma =
+		container_of(work, struct svcxprt_rdma, sc_work);
 	dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+
 	/* We should only be called from kref_put */
-	BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
+	BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
+
 	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
 		ib_destroy_cq(rdma->sc_sq_cq);
 
@@ -985,6 +988,14 @@ static void svc_rdma_free(struct svc_xprt *xprt)
 	kfree(rdma);
 }
 
+static void svc_rdma_free(struct svc_xprt *xprt)
+{
+	struct svcxprt_rdma *rdma =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+	INIT_WORK(&rdma->sc_work, __svc_rdma_free);
+	schedule_work(&rdma->sc_work);
+}
+
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 {
 	struct svcxprt_rdma *rdma =
-- 
cgit v1.2.3


From 008fdbc57164b0ac237ad6ee2766944f02ac9c28 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 7 May 2008 15:47:42 -0500
Subject: svcrdma: Change svc_rdma_send_error return type to void

The svc_rdma_send_error function is called when an RPCRDMA protocol
error is detected. This function attempts to post an error reply message.
Since an error posting to a transport in error is ignored, change
the return type to void.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          | 4 ++--
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f5f15ae2438b..05eb4664d0dd 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -166,8 +166,8 @@ extern int svc_rdma_sendto(struct svc_rqst *);
 
 /* svc_rdma_transport.c */
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
-extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
-			       enum rpcrdma_errcode);
+extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
+				enum rpcrdma_errcode);
 struct page *svc_rdma_get_page(void);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c016f5ca0ce5..6b16d8cd5682 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -497,7 +497,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	/* If the request is invalid, reply with an error */
 	if (len < 0) {
 		if (len == -ENOSYS)
-			(void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
+			svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
 		goto close_out;
 	}
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c7545203f4b3..e132509d1db0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1114,8 +1114,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 	return ret;
 }
 
-int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
-			enum rpcrdma_errcode err)
+void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
+			 enum rpcrdma_errcode err)
 {
 	struct ib_send_wr err_wr;
 	struct ib_sge sge;
@@ -1153,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	/* Post It */
 	ret = svc_rdma_send(xprt, &err_wr);
 	if (ret) {
-		dprintk("svcrdma: Error posting send = %d\n", ret);
+		dprintk("svcrdma: Error %d posting send for protocol error\n",
+			ret);
 		svc_rdma_put_context(ctxt, 1);
 	}
-
-	return ret;
 }
-- 
cgit v1.2.3


From c3cc3bd0d36d1b16d4cb17e8fc64fff613f0b902 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sat, 17 May 2008 16:58:28 -0400
Subject: dlm: <linux/dlm_plock.h> should be "unifdef"ed.

Given that <linux/dlm_plock.h> contains a conditional __KERNEL__ test,
it should be moved from header-y to unifdef-y.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/Kbuild | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b7d81b2a9041..5dfa739045c8 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -105,7 +105,6 @@ header-y += ixjuser.h
 header-y += jffs2.h
 header-y += keyctl.h
 header-y += limits.h
-header-y += dlm_plock.h
 header-y += magic.h
 header-y += major.h
 header-y += matroxfb.h
@@ -190,6 +189,7 @@ unifdef-y += cyclades.h
 unifdef-y += dccp.h
 unifdef-y += dirent.h
 unifdef-y += dlm.h
+unifdef-y += dlm_plock.h
 unifdef-y += edd.h
 unifdef-y += elf.h
 unifdef-y += elfcore.h
-- 
cgit v1.2.3


From e6da97e7df385a1674cf9f72c31b7a0e46e2620d Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Mon, 19 May 2008 14:13:11 -0700
Subject: ipv6: Move <linux/in6.h> from header-y to unifdef-y.

Given that <linux/in6.h> contains a __KERNEL__ test, it should be
unifdef-ed.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b7d81b2a9041..54a4cfb50ed0 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -92,7 +92,6 @@ header-y += if_slip.h
 header-y += if_strip.h
 header-y += if_tun.h
 header-y += if_tunnel.h
-header-y += in6.h
 header-y += in_route.h
 header-y += ioctl.h
 header-y += ip6_tunnel.h
@@ -236,6 +235,7 @@ unifdef-y += if_vlan.h
 unifdef-y += igmp.h
 unifdef-y += inet_diag.h
 unifdef-y += in.h
+unifdef-y += in6.h
 unifdef-y += inotify.h
 unifdef-y += input.h
 unifdef-y += ip.h
-- 
cgit v1.2.3


From 07633b5d0723ce2ec31262e1096dcf61311bf078 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:17:00 -0700
Subject: ata: remove FIT() macro

Use the kernel-provided clamp_val() macro.

FIT was always applied to a member of struct ata_timing (unsigned short)
and two constants.  clamp_val will not cast to short anymore.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Tejun Heo <htejun@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_ali.c     | 10 +++++-----
 drivers/ata/pata_amd.c     | 14 ++++++-------
 drivers/ata/pata_cypress.c |  8 ++++----
 drivers/ata/pata_legacy.c  | 50 +++++++++++++++++++++++-----------------------
 drivers/ata/pata_ns87410.c |  6 +++---
 drivers/ata/pata_ns87415.c |  4 ++--
 drivers/ata/pata_qdi.c     | 16 +++++++--------
 drivers/ata/pata_via.c     | 14 ++++++-------
 drivers/ata/pata_winbond.c |  6 +++---
 include/linux/libata.h     |  2 --
 10 files changed, 64 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index fcabe46f262b..0f3e659db99a 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -177,11 +177,11 @@ static void ali_program_modes(struct ata_port *ap, struct ata_device *adev, stru
 	u8 udma;
 
 	if (t != NULL) {
-		t->setup = FIT(t->setup, 1, 8) & 7;
-		t->act8b = FIT(t->act8b, 1, 8) & 7;
-		t->rec8b = FIT(t->rec8b, 1, 16) & 15;
-		t->active = FIT(t->active, 1, 8) & 7;
-		t->recover = FIT(t->recover, 1, 16) & 15;
+		t->setup = clamp_val(t->setup, 1, 8) & 7;
+		t->act8b = clamp_val(t->act8b, 1, 8) & 7;
+		t->rec8b = clamp_val(t->rec8b, 1, 16) & 15;
+		t->active = clamp_val(t->active, 1, 8) & 7;
+		t->recover = clamp_val(t->recover, 1, 16) & 15;
 
 		pci_write_config_byte(pdev, cas, t->setup);
 		pci_write_config_byte(pdev, cbt, (t->act8b << 4) | t->rec8b);
diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c
index 26665c396485..57dd00f463d3 100644
--- a/drivers/ata/pata_amd.c
+++ b/drivers/ata/pata_amd.c
@@ -84,32 +84,32 @@ static void timing_setup(struct ata_port *ap, struct ata_device *adev, int offse
 
 	/* Configure the address set up timing */
 	pci_read_config_byte(pdev, offset + 0x0C, &t);
-	t = (t & ~(3 << ((3 - dn) << 1))) | ((FIT(at.setup, 1, 4) - 1) << ((3 - dn) << 1));
+	t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(at.setup, 1, 4) - 1) << ((3 - dn) << 1));
 	pci_write_config_byte(pdev, offset + 0x0C , t);
 
 	/* Configure the 8bit I/O timing */
 	pci_write_config_byte(pdev, offset + 0x0E + (1 - (dn >> 1)),
-		((FIT(at.act8b, 1, 16) - 1) << 4) | (FIT(at.rec8b, 1, 16) - 1));
+		((clamp_val(at.act8b, 1, 16) - 1) << 4) | (clamp_val(at.rec8b, 1, 16) - 1));
 
 	/* Drive timing */
 	pci_write_config_byte(pdev, offset + 0x08 + (3 - dn),
-		((FIT(at.active, 1, 16) - 1) << 4) | (FIT(at.recover, 1, 16) - 1));
+		((clamp_val(at.active, 1, 16) - 1) << 4) | (clamp_val(at.recover, 1, 16) - 1));
 
 	switch (clock) {
 		case 1:
-		t = at.udma ? (0xc0 | (FIT(at.udma, 2, 5) - 2)) : 0x03;
+		t = at.udma ? (0xc0 | (clamp_val(at.udma, 2, 5) - 2)) : 0x03;
 		break;
 
 		case 2:
-		t = at.udma ? (0xc0 | amd_cyc2udma[FIT(at.udma, 2, 10)]) : 0x03;
+		t = at.udma ? (0xc0 | amd_cyc2udma[clamp_val(at.udma, 2, 10)]) : 0x03;
 		break;
 
 		case 3:
-		t = at.udma ? (0xc0 | amd_cyc2udma[FIT(at.udma, 1, 10)]) : 0x03;
+		t = at.udma ? (0xc0 | amd_cyc2udma[clamp_val(at.udma, 1, 10)]) : 0x03;
 		break;
 
 		case 4:
-		t = at.udma ? (0xc0 | amd_cyc2udma[FIT(at.udma, 1, 15)]) : 0x03;
+		t = at.udma ? (0xc0 | amd_cyc2udma[clamp_val(at.udma, 1, 15)]) : 0x03;
 		break;
 
 		default:
diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c
index a9c3218e22fd..2ff62608ae37 100644
--- a/drivers/ata/pata_cypress.c
+++ b/drivers/ata/pata_cypress.c
@@ -62,14 +62,14 @@ static void cy82c693_set_piomode(struct ata_port *ap, struct ata_device *adev)
 		return;
 	}
 
-	time_16 = FIT(t.recover, 0, 15) | (FIT(t.active, 0, 15) << 4);
-	time_8 = FIT(t.act8b, 0, 15) | (FIT(t.rec8b, 0, 15) << 4);
+	time_16 = clamp_val(t.recover, 0, 15) | (clamp_val(t.active, 0, 15) << 4);
+	time_8 = clamp_val(t.act8b, 0, 15) | (clamp_val(t.rec8b, 0, 15) << 4);
 
 	if (adev->devno == 0) {
 		pci_read_config_dword(pdev, CY82_IDE_ADDRSETUP, &addr);
 
 		addr &= ~0x0F;	/* Mask bits */
-		addr |= FIT(t.setup, 0, 15);
+		addr |= clamp_val(t.setup, 0, 15);
 
 		pci_write_config_dword(pdev, CY82_IDE_ADDRSETUP, addr);
 		pci_write_config_byte(pdev, CY82_IDE_MASTER_IOR, time_16);
@@ -79,7 +79,7 @@ static void cy82c693_set_piomode(struct ata_port *ap, struct ata_device *adev)
 		pci_read_config_dword(pdev, CY82_IDE_ADDRSETUP, &addr);
 
 		addr &= ~0xF0;	/* Mask bits */
-		addr |= (FIT(t.setup, 0, 15) << 4);
+		addr |= (clamp_val(t.setup, 0, 15) << 4);
 
 		pci_write_config_dword(pdev, CY82_IDE_ADDRSETUP, addr);
 		pci_write_config_byte(pdev, CY82_IDE_SLAVE_IOR, time_16);
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 7af4b29cc422..fe7cc8ed4ea4 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -343,8 +343,8 @@ static void ht6560a_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	/* Get the timing data in cycles. For now play safe at 50Mhz */
 	ata_timing_compute(adev, adev->pio_mode, &t, 20000, 1000);
 
-	active = FIT(t.active, 2, 15);
-	recover = FIT(t.recover, 4, 15);
+	active = clamp_val(t.active, 2, 15);
+	recover = clamp_val(t.recover, 4, 15);
 
 	inb(0x3E6);
 	inb(0x3E6);
@@ -377,8 +377,8 @@ static void ht6560b_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	/* Get the timing data in cycles. For now play safe at 50Mhz */
 	ata_timing_compute(adev, adev->pio_mode, &t, 20000, 1000);
 
-	active = FIT(t.active, 2, 15);
-	recover = FIT(t.recover, 2, 16);
+	active = clamp_val(t.active, 2, 15);
+	recover = clamp_val(t.recover, 2, 16);
 	recover &= 0x15;
 
 	inb(0x3E6);
@@ -462,9 +462,9 @@ static void opti82c611a_set_piomode(struct ata_port *ap,
 		ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP);
 	}
 
-	active = FIT(t.active, 2, 17) - 2;
-	recover = FIT(t.recover, 1, 16) - 1;
-	setup = FIT(t.setup, 1, 4) - 1;
+	active = clamp_val(t.active, 2, 17) - 2;
+	recover = clamp_val(t.recover, 1, 16) - 1;
+	setup = clamp_val(t.setup, 1, 4) - 1;
 
 	/* Select the right timing bank for write timing */
 	rc = ioread8(ap->ioaddr.lbal_addr);
@@ -541,9 +541,9 @@ static void opti82c46x_set_piomode(struct ata_port *ap, struct ata_device *adev)
 		ata_timing_merge(&t, &tp, &t, ATA_TIMING_SETUP);
 	}
 
-	active = FIT(t.active, 2, 17) - 2;
-	recover = FIT(t.recover, 1, 16) - 1;
-	setup = FIT(t.setup, 1, 4) - 1;
+	active = clamp_val(t.active, 2, 17) - 2;
+	recover = clamp_val(t.recover, 1, 16) - 1;
+	setup = clamp_val(t.setup, 1, 4) - 1;
 
 	/* Select the right timing bank for write timing */
 	rc = ioread8(ap->ioaddr.lbal_addr);
@@ -624,11 +624,11 @@ static void qdi6500_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
 	if (ld_qdi->fast) {
-		active = 8 - FIT(t.active, 1, 8);
-		recovery = 18 - FIT(t.recover, 3, 18);
+		active = 8 - clamp_val(t.active, 1, 8);
+		recovery = 18 - clamp_val(t.recover, 3, 18);
 	} else {
-		active = 9 - FIT(t.active, 2, 9);
-		recovery = 15 - FIT(t.recover, 0, 15);
+		active = 9 - clamp_val(t.active, 2, 9);
+		recovery = 15 - clamp_val(t.recover, 0, 15);
 	}
 	timing = (recovery << 4) | active | 0x08;
 
@@ -658,11 +658,11 @@ static void qdi6580dp_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
 	if (ld_qdi->fast) {
-		active = 8 - FIT(t.active, 1, 8);
-		recovery = 18 - FIT(t.recover, 3, 18);
+		active = 8 - clamp_val(t.active, 1, 8);
+		recovery = 18 - clamp_val(t.recover, 3, 18);
 	} else {
-		active = 9 - FIT(t.active, 2, 9);
-		recovery = 15 - FIT(t.recover, 0, 15);
+		active = 9 - clamp_val(t.active, 2, 9);
+		recovery = 15 - clamp_val(t.recover, 0, 15);
 	}
 	timing = (recovery << 4) | active | 0x08;
 
@@ -695,11 +695,11 @@ static void qdi6580_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
 	if (ld_qdi->fast) {
-		active = 8 - FIT(t.active, 1, 8);
-		recovery = 18 - FIT(t.recover, 3, 18);
+		active = 8 - clamp_val(t.active, 1, 8);
+		recovery = 18 - clamp_val(t.recover, 3, 18);
 	} else {
-		active = 9 - FIT(t.active, 2, 9);
-		recovery = 15 - FIT(t.recover, 0, 15);
+		active = 9 - clamp_val(t.active, 2, 9);
+		recovery = 15 - clamp_val(t.recover, 0, 15);
 	}
 	timing = (recovery << 4) | active | 0x08;
 	ld_qdi->clock[adev->devno] = timing;
@@ -830,8 +830,8 @@ static void winbond_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	else
 		ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
-	active = (FIT(t.active, 3, 17) - 1) & 0x0F;
-	recovery = (FIT(t.recover, 1, 15) + 1) & 0x0F;
+	active = (clamp_val(t.active, 3, 17) - 1) & 0x0F;
+	recovery = (clamp_val(t.recover, 1, 15) + 1) & 0x0F;
 	timing = (active << 4) | recovery;
 	winbond_writecfg(ld_winbond->timing, timing, reg);
 
@@ -842,7 +842,7 @@ static void winbond_set_piomode(struct ata_port *ap, struct ata_device *adev)
 		reg |= 0x08;	/* FIFO off */
 	if (!ata_pio_need_iordy(adev))
 		reg |= 0x02;	/* IORDY off */
-	reg |= (FIT(t.setup, 0, 3) << 6);
+	reg |= (clamp_val(t.setup, 0, 3) << 6);
 	winbond_writecfg(ld_winbond->timing, timing + 1, reg);
 }
 
diff --git a/drivers/ata/pata_ns87410.c b/drivers/ata/pata_ns87410.c
index 76d2455bc453..be756b7ef07e 100644
--- a/drivers/ata/pata_ns87410.c
+++ b/drivers/ata/pata_ns87410.c
@@ -91,9 +91,9 @@ static void ns87410_set_piomode(struct ata_port *ap, struct ata_device *adev)
 		return;
 	}
 
-	at.active = FIT(at.active, 2, 16) - 2;
-	at.setup = FIT(at.setup, 1, 4) - 1;
-	at.recover = FIT(at.recover, 1, 12) - 1;
+	at.active = clamp_val(at.active, 2, 16) - 2;
+	at.setup = clamp_val(at.setup, 1, 4) - 1;
+	at.recover = clamp_val(at.recover, 1, 12) - 1;
 
 	idetcr = (at.setup << 6) | (recoverbits[at.recover] << 3) | activebits[at.active];
 
diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
index ae92b0049bd5..e0aa7eaaee0a 100644
--- a/drivers/ata/pata_ns87415.c
+++ b/drivers/ata/pata_ns87415.c
@@ -66,8 +66,8 @@ static void ns87415_set_mode(struct ata_port *ap, struct ata_device *adev, u8 mo
 
 	ata_timing_compute(adev, adev->pio_mode, &t, T, 0);
 
-	clocking = 17 - FIT(t.active, 2, 17);
-	clocking |= (16 - FIT(t.recover, 1, 16)) << 4;
+	clocking = 17 - clamp_val(t.active, 2, 17);
+	clocking |= (16 - clamp_val(t.recover, 1, 16)) << 4;
  	/* Use the same timing for read and write bytes */
 	clocking |= (clocking << 8);
 	pci_write_config_word(dev, timing, clocking);
diff --git a/drivers/ata/pata_qdi.c b/drivers/ata/pata_qdi.c
index bf45cf017753..97e5b090d7c2 100644
--- a/drivers/ata/pata_qdi.c
+++ b/drivers/ata/pata_qdi.c
@@ -60,11 +60,11 @@ static void qdi6500_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
 	if (qdi->fast) {
-		active = 8 - FIT(t.active, 1, 8);
-		recovery = 18 - FIT(t.recover, 3, 18);
+		active = 8 - clamp_val(t.active, 1, 8);
+		recovery = 18 - clamp_val(t.recover, 3, 18);
 	} else {
-		active = 9 - FIT(t.active, 2, 9);
-		recovery = 15 - FIT(t.recover, 0, 15);
+		active = 9 - clamp_val(t.active, 2, 9);
+		recovery = 15 - clamp_val(t.recover, 0, 15);
 	}
 	timing = (recovery << 4) | active | 0x08;
 
@@ -84,11 +84,11 @@ static void qdi6580_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
 	if (qdi->fast) {
-		active = 8 - FIT(t.active, 1, 8);
-		recovery = 18 - FIT(t.recover, 3, 18);
+		active = 8 - clamp_val(t.active, 1, 8);
+		recovery = 18 - clamp_val(t.recover, 3, 18);
 	} else {
-		active = 9 - FIT(t.active, 2, 9);
-		recovery = 15 - FIT(t.recover, 0, 15);
+		active = 9 - clamp_val(t.active, 2, 9);
+		recovery = 15 - clamp_val(t.recover, 0, 15);
 	}
 	timing = (recovery << 4) | active | 0x08;
 
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 2fea6cbe7755..708ed144ede9 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -259,15 +259,15 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev, int mo
 
 		pci_read_config_byte(pdev, 0x4C, &setup);
 		setup &= ~(3 << shift);
-		setup |= FIT(t.setup, 1, 4) << shift;	/* 1,4 or 1,4 - 1  FIXME */
+		setup |= clamp_val(t.setup, 1, 4) << shift;	/* 1,4 or 1,4 - 1  FIXME */
 		pci_write_config_byte(pdev, 0x4C, setup);
 	}
 
 	/* Load the PIO mode bits */
 	pci_write_config_byte(pdev, 0x4F - ap->port_no,
-		((FIT(t.act8b, 1, 16) - 1) << 4) | (FIT(t.rec8b, 1, 16) - 1));
+		((clamp_val(t.act8b, 1, 16) - 1) << 4) | (clamp_val(t.rec8b, 1, 16) - 1));
 	pci_write_config_byte(pdev, 0x48 + offset,
-		((FIT(t.active, 1, 16) - 1) << 4) | (FIT(t.recover, 1, 16) - 1));
+		((clamp_val(t.active, 1, 16) - 1) << 4) | (clamp_val(t.recover, 1, 16) - 1));
 
 	/* Load the UDMA bits according to type */
 	switch(udma_type) {
@@ -275,16 +275,16 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev, int mo
 			/* BUG() ? */
 			/* fall through */
 		case 33:
-			ut = t.udma ? (0xe0 | (FIT(t.udma, 2, 5) - 2)) : 0x03;
+			ut = t.udma ? (0xe0 | (clamp_val(t.udma, 2, 5) - 2)) : 0x03;
 			break;
 		case 66:
-			ut = t.udma ? (0xe8 | (FIT(t.udma, 2, 9) - 2)) : 0x0f;
+			ut = t.udma ? (0xe8 | (clamp_val(t.udma, 2, 9) - 2)) : 0x0f;
 			break;
 		case 100:
-			ut = t.udma ? (0xe0 | (FIT(t.udma, 2, 9) - 2)) : 0x07;
+			ut = t.udma ? (0xe0 | (clamp_val(t.udma, 2, 9) - 2)) : 0x07;
 			break;
 		case 133:
-			ut = t.udma ? (0xe0 | (FIT(t.udma, 2, 9) - 2)) : 0x07;
+			ut = t.udma ? (0xe0 | (clamp_val(t.udma, 2, 9) - 2)) : 0x07;
 			break;
 	}
 
diff --git a/drivers/ata/pata_winbond.c b/drivers/ata/pata_winbond.c
index 6e52a3573fbf..474528f8fe3d 100644
--- a/drivers/ata/pata_winbond.c
+++ b/drivers/ata/pata_winbond.c
@@ -75,8 +75,8 @@ static void winbond_set_piomode(struct ata_port *ap, struct ata_device *adev)
 	else
 		ata_timing_compute(adev, adev->pio_mode, &t, 30303, 1000);
 
-	active = (FIT(t.active, 3, 17) - 1) & 0x0F;
-	recovery = (FIT(t.recover, 1, 15) + 1) & 0x0F;
+	active = (clamp_val(t.active, 3, 17) - 1) & 0x0F;
+	recovery = (clamp_val(t.recover, 1, 15) + 1) & 0x0F;
 	timing = (active << 4) | recovery;
 	winbond_writecfg(winbond->config, timing, reg);
 
@@ -87,7 +87,7 @@ static void winbond_set_piomode(struct ata_port *ap, struct ata_device *adev)
 		reg |= 0x08;	/* FIFO off */
 	if (!ata_pio_need_iordy(adev))
 		reg |= 0x02;	/* IORDY off */
-	reg |= (FIT(t.setup, 0, 3) << 6);
+	reg |= (clamp_val(t.setup, 0, 3) << 6);
 	winbond_writecfg(winbond->config, timing + 1, reg);
 }
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0f17643e0a6e..07ec193fc941 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -821,8 +821,6 @@ struct ata_timing {
 	unsigned short udma;		/* t2CYCTYP/2 */
 };
 
-#define FIT(v, vmin, vmax)	max_t(short, min_t(short, v, vmax), vmin)
-
 /*
  * Core layer - drivers/ata/libata-core.c
  */
-- 
cgit v1.2.3


From bf1bff6fa9fdd4e92e57d80a5434fd5201c051fc Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 19 May 2008 01:15:10 +0900
Subject: libata: increase PMP register access timeout to 3s

This timeout was set low because previously PMP register access was
done via polling and register access timeouts could stack up.  This is
no longer the case.  One timeout will make all following accesses fail
immediately.

In rare cases both marvell and SIMG PMPs need almost a second.  Bump
it to 3s.

While at it, rename it to SATA_PMP_RW_TIMEOUT.  It's not specific to
SCR access.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-pmp.c | 4 ++--
 include/linux/libata.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index f3ad024394c2..04a486a3e7b8 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -48,7 +48,7 @@ static unsigned int sata_pmp_read(struct ata_link *link, int reg, u32 *r_val)
 	tf.device = link->pmp;
 
 	err_mask = ata_exec_internal(pmp_dev, &tf, NULL, DMA_NONE, NULL, 0,
-				     SATA_PMP_SCR_TIMEOUT);
+				     SATA_PMP_RW_TIMEOUT);
 	if (err_mask)
 		return err_mask;
 
@@ -88,7 +88,7 @@ static unsigned int sata_pmp_write(struct ata_link *link, int reg, u32 val)
 	tf.lbah = (val >> 24) & 0xff;
 
 	return ata_exec_internal(pmp_dev, &tf, NULL, DMA_NONE, NULL, 0,
-				 SATA_PMP_SCR_TIMEOUT);
+				 SATA_PMP_RW_TIMEOUT);
 }
 
 /**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 07ec193fc941..8d6999da1d3e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -341,7 +341,7 @@ enum {
 	ATA_EH_PMP_TRIES	= 5,
 	ATA_EH_PMP_LINK_TRIES	= 3,
 
-	SATA_PMP_SCR_TIMEOUT	= 250,
+	SATA_PMP_RW_TIMEOUT	= 3000,		/* PMP read/write timeout */
 
 	/* Horkage types. May be set by libata or controller on drives
 	   (some horkage may be drive/controller pair dependant */
-- 
cgit v1.2.3


From 50af2fa1e18d0ab411d06bf727ecadb7e01721e9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 19 May 2008 01:15:14 +0900
Subject: libata: ignore SIMG4726 config pseudo device

I was hoping ATA_HORKAGE_NODMA | ATA_HORKAGE_SKIP_PM could keep it
happy but no even this doesn't work under certain configurations and
it's not like we can do anything useful with the cofig device anyway.
Replace ATA_HORKAGE_SKIP_PM with ATA_HORKAGE_DISABLE and use it for
the config device.  This makes the device completely ignored by
libata.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 10 ++++++++--
 drivers/ata/libata-scsi.c |  6 ------
 include/linux/libata.h    |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index a12a27eb8c77..3c89f205c83f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2126,6 +2126,13 @@ int ata_dev_configure(struct ata_device *dev)
 	dev->horkage |= ata_dev_blacklisted(dev);
 	ata_force_horkage(dev);
 
+	if (dev->horkage & ATA_HORKAGE_DISABLE) {
+		ata_dev_printk(dev, KERN_INFO,
+			       "unsupported device, disabling\n");
+		ata_dev_disable(dev);
+		return 0;
+	}
+
 	/* let ACPI work its magic */
 	rc = ata_acpi_on_devcfg(dev);
 	if (rc)
@@ -3893,8 +3900,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "SAMSUNG CD-ROM SN-124", "N001",	ATA_HORKAGE_NODMA },
 	{ "Seagate STT20000A", NULL,		ATA_HORKAGE_NODMA },
 	/* Odd clown on sil3726/4726 PMPs */
-	{ "Config  Disk",	NULL,		ATA_HORKAGE_NODMA |
-						ATA_HORKAGE_SKIP_PM },
+	{ "Config  Disk",	NULL,		ATA_HORKAGE_DISABLE },
 
 	/* Weird ATAPI devices */
 	{ "TORiSAN DVD-ROM DRD-N216", NULL,	ATA_HORKAGE_MAX_SEC_128 },
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3ce43920e459..aeb6e01d82ce 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1082,12 +1082,6 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc)
 	if (((cdb[4] >> 4) & 0xf) != 0)
 		goto invalid_fld;       /* power conditions not supported */
 
-	if (qc->dev->horkage & ATA_HORKAGE_SKIP_PM) {
-		/* the device lacks PM support, finish without doing anything */
-		scmd->result = SAM_STAT_GOOD;
-		return 1;
-	}
-
 	if (cdb[4] & 0x1) {
 		tf->nsect = 1;	/* 1 sector, lba=0 */
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8d6999da1d3e..4a92fbafce9d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -351,7 +351,7 @@ enum {
 	ATA_HORKAGE_NONCQ	= (1 << 2),	/* Don't use NCQ */
 	ATA_HORKAGE_MAX_SEC_128	= (1 << 3),	/* Limit max sects to 128 */
 	ATA_HORKAGE_BROKEN_HPA	= (1 << 4),	/* Broken HPA */
-	ATA_HORKAGE_SKIP_PM	= (1 << 5),	/* Skip PM operations */
+	ATA_HORKAGE_DISABLE	= (1 << 5),	/* Disable it */
 	ATA_HORKAGE_HPA_SIZE	= (1 << 6),	/* native size off by one */
 	ATA_HORKAGE_IPM		= (1 << 7),	/* Link PM problems */
 	ATA_HORKAGE_IVB		= (1 << 8),	/* cbl det validity bit bugs */
-- 
cgit v1.2.3


From 6e7045990f35ef9250804b3fd85e855b8c2aaeb6 Mon Sep 17 00:00:00 2001
From: Diego 'Flameeyes' Petteno <flameeyes@gmail.com>
Date: Mon, 5 May 2008 16:20:50 +0200
Subject: HID: split Numlock emulation quirk from HID_QUIRK_APPLE_HAS_FN.

Since 2.6.25 the HID_QUIRK_APPLE_HAS_FN quirk is enabled even for
non-laptop Apple keyboards of the Aluminium series. The USB version of
these don't need Numlock emulation, like the laptop (and Aluminium
Wireless) do, as they have a proper keypad.

This patch splits the Numlock emulation for Apple keyboards in a
different quirk flag, so that it can be enabled for all the keyboards
but the Aluminium USB ones.

If the Numlock emulation is enabled for Aluminium USB keyboards, the
JKL and UIO keys become the numeric pad, and the rest of the keyboard
is disabled, included the key used to disable Numlock.

Additionally, these keyboard should not have a Numlock at all, as the
Numlock key is instead replaced by the 'Clear' key as usual for Apple
USB keyboards.

Signed-off-by: Diego 'Flameeyes' Petteno <flameeyes@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c         |  5 +++--
 drivers/hid/usbhid/hid-quirks.c | 38 +++++++++++++++++++-------------------
 include/linux/hid.h             |  1 +
 3 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index c3eb3f13e2ca..452b94dd7401 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -218,8 +218,9 @@ int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 			}
 		}
 
-		if (test_bit(usage->code, hid->pb_pressed_numlock) ||
-				test_bit(LED_NUML, input->led)) {
+		if (hid->quirks & HID_QUIRK_APPLE_NUMLOCK_EMULATION && (
+				test_bit(usage->code, hid->pb_pressed_numlock) ||
+				test_bit(LED_NUML, input->led))) {
 			trans = find_translation(powerbook_numlock_keys, usage->code);
 
 			if (trans) {
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index d3f8d9194f30..a2fc8d490195 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -611,28 +611,28 @@ static const struct hid_blacklist {
 
 	{ USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SMARTJOY_DUAL_PLUS, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT },
 
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE | HID_QUIRK_APPLE_ISO_KEYBOARD},
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
 	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI, HID_QUIRK_APPLE_HAS_FN },
 	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD },
 	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS, HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI, HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS, HID_QUIRK_APPLE_HAS_FN },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
-	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY, HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_APPLE_ISO_KEYBOARD },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
+	{ USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY, HID_QUIRK_APPLE_NUMLOCK_EMULATION | HID_QUIRK_APPLE_HAS_FN | HID_QUIRK_IGNORE_MOUSE },
 
 	{ USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_W7658, HID_QUIRK_RESET_LEDS },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KBD, HID_QUIRK_RESET_LEDS },
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 4ce3b7a979ba..6fc10d19d14d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -285,6 +285,7 @@ struct hid_item {
 #define HID_QUIRK_HWHEEL_WHEEL_INVERT		0x04000000
 #define HID_QUIRK_MICROSOFT_KEYS		0x08000000
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
+#define HID_QUIRK_APPLE_NUMLOCK_EMULATION	0x20000000
 
 /*
  * Separate quirks for runtime report descriptor fixup
-- 
cgit v1.2.3


From f8dea7a3d47ee7c857965b22e33229e7de410a88 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 20 May 2008 01:31:25 +0200
Subject: HID: remove CVS keywords

This patch removes CVS keywords that weren't updated for a long time
from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c       | 2 --
 drivers/hid/hid-input.c       | 2 --
 drivers/hid/usbhid/usbkbd.c   | 2 --
 drivers/hid/usbhid/usbmouse.c | 2 --
 include/linux/hid.h           | 2 --
 include/linux/hiddev.h        | 2 --
 6 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index f88714b06000..47ac1a7d66e1 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1,6 +1,4 @@
 /*
- * $Id: hid-debug.h,v 1.8 2001/09/25 09:37:57 vojtech Exp $
- *
  *  (c) 1999 Andreas Gal		<gal@cs.uni-magdeburg.de>
  *  (c) 2000-2001 Vojtech Pavlik	<vojtech@ucw.cz>
  *  (c) 2007 Jiri Kosina
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 452b94dd7401..5c52a20ad344 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1,6 +1,4 @@
 /*
- * $Id: hid-input.c,v 1.2 2002/04/23 00:59:25 rdamazio Exp $
- *
  *  Copyright (c) 2000-2001 Vojtech Pavlik
  *  Copyright (c) 2006-2007 Jiri Kosina
  *
diff --git a/drivers/hid/usbhid/usbkbd.c b/drivers/hid/usbhid/usbkbd.c
index 5d9dbb47e4a8..3cd46d2e53c1 100644
--- a/drivers/hid/usbhid/usbkbd.c
+++ b/drivers/hid/usbhid/usbkbd.c
@@ -1,6 +1,4 @@
 /*
- * $Id: usbkbd.c,v 1.27 2001/12/27 10:37:41 vojtech Exp $
- *
  *  Copyright (c) 1999-2001 Vojtech Pavlik
  *
  *  USB HIDBP Keyboard support
diff --git a/drivers/hid/usbhid/usbmouse.c b/drivers/hid/usbhid/usbmouse.c
index df0d96d989de..703e9d0e8714 100644
--- a/drivers/hid/usbhid/usbmouse.c
+++ b/drivers/hid/usbhid/usbmouse.c
@@ -1,6 +1,4 @@
 /*
- * $Id: usbmouse.c,v 1.15 2001/12/27 10:37:41 vojtech Exp $
- *
  *  Copyright (c) 1999-2001 Vojtech Pavlik
  *
  *  USB HIDBP Mouse support
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 6fc10d19d14d..fe56b86f2c67 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -2,8 +2,6 @@
 #define __HID_H
 
 /*
- * $Id: hid.h,v 1.24 2001/12/27 10:37:41 vojtech Exp $
- *
  *  Copyright (c) 1999 Andreas Gal
  *  Copyright (c) 2000-2001 Vojtech Pavlik
  *  Copyright (c) 2006-2007 Jiri Kosina
diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h
index acbdae6d7ae1..a416b904ba90 100644
--- a/include/linux/hiddev.h
+++ b/include/linux/hiddev.h
@@ -2,8 +2,6 @@
 #define _HIDDEV_H
 
 /*
- * $Id: hiddev.h,v 1.2 2001/04/26 11:26:09 vojtech Exp $
- *
  *  Copyright (c) 1999-2000 Vojtech Pavlik
  *
  *  Sponsored by SuSE
-- 
cgit v1.2.3


From 8882b39421bae317e3ee864edd845e994307ce16 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 15 May 2008 13:44:08 -0700
Subject: Driver core: add device_create_vargs and device_create_drvdata

We want to have the drvdata field set properly when creating the device
as sysfs callbacks can assume it is present and it can race the later
setting of this field.

So, create two new functions, deviec_create_vargs() and
device_create_drvdata() that take this new field.

device_create_drvdata() will go away in 2.6.27 as the drvdata field will
just be moved to the device_create() call as it should be.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 85 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/device.h | 12 +++++++
 2 files changed, 91 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index be288b5e4180..f861c2b1dcff 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1084,11 +1084,13 @@ static void device_create_release(struct device *dev)
 }
 
 /**
- * device_create - creates a device and registers it with sysfs
+ * device_create_vargs - creates a device and registers it with sysfs
  * @class: pointer to the struct class that this device should be registered to
  * @parent: pointer to the parent struct device of this new device, if any
  * @devt: the dev_t for the char device to be added
+ * @drvdata: the data to be added to the device for callbacks
  * @fmt: string for the device's name
+ * @args: va_list for the device's name
  *
  * This function can be used by char device classes.  A struct device
  * will be created in sysfs, registered to the specified class.
@@ -1104,10 +1106,10 @@ static void device_create_release(struct device *dev)
  * Note: the struct class passed to this function must have previously
  * been created with a call to class_create().
  */
-struct device *device_create(struct class *class, struct device *parent,
-			     dev_t devt, const char *fmt, ...)
+struct device *device_create_vargs(struct class *class, struct device *parent,
+				   dev_t devt, void *drvdata, const char *fmt,
+				   va_list args)
 {
-	va_list args;
 	struct device *dev = NULL;
 	int retval = -ENODEV;
 
@@ -1124,10 +1126,9 @@ struct device *device_create(struct class *class, struct device *parent,
 	dev->class = class;
 	dev->parent = parent;
 	dev->release = device_create_release;
+	dev_set_drvdata(dev, drvdata);
 
-	va_start(args, fmt);
 	vsnprintf(dev->bus_id, BUS_ID_SIZE, fmt, args);
-	va_end(args);
 	retval = device_register(dev);
 	if (retval)
 		goto error;
@@ -1138,6 +1139,78 @@ error:
 	kfree(dev);
 	return ERR_PTR(retval);
 }
+EXPORT_SYMBOL_GPL(device_create_vargs);
+
+/**
+ * device_create_drvdata - creates a device and registers it with sysfs
+ * @class: pointer to the struct class that this device should be registered to
+ * @parent: pointer to the parent struct device of this new device, if any
+ * @devt: the dev_t for the char device to be added
+ * @drvdata: the data to be added to the device for callbacks
+ * @fmt: string for the device's name
+ *
+ * This function can be used by char device classes.  A struct device
+ * will be created in sysfs, registered to the specified class.
+ *
+ * A "dev" file will be created, showing the dev_t for the device, if
+ * the dev_t is not 0,0.
+ * If a pointer to a parent struct device is passed in, the newly created
+ * struct device will be a child of that device in sysfs.
+ * The pointer to the struct device will be returned from the call.
+ * Any further sysfs files that might be required can be created using this
+ * pointer.
+ *
+ * Note: the struct class passed to this function must have previously
+ * been created with a call to class_create().
+ */
+struct device *device_create_drvdata(struct class *class,
+				     struct device *parent,
+				     dev_t devt,
+				     void *drvdata,
+				     const char *fmt, ...)
+{
+	va_list vargs;
+	struct device *dev;
+
+	va_start(vargs, fmt);
+	dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
+	va_end(vargs);
+	return dev;
+}
+EXPORT_SYMBOL_GPL(device_create_drvdata);
+
+/**
+ * device_create - creates a device and registers it with sysfs
+ * @class: pointer to the struct class that this device should be registered to
+ * @parent: pointer to the parent struct device of this new device, if any
+ * @devt: the dev_t for the char device to be added
+ * @fmt: string for the device's name
+ *
+ * This function can be used by char device classes.  A struct device
+ * will be created in sysfs, registered to the specified class.
+ *
+ * A "dev" file will be created, showing the dev_t for the device, if
+ * the dev_t is not 0,0.
+ * If a pointer to a parent struct device is passed in, the newly created
+ * struct device will be a child of that device in sysfs.
+ * The pointer to the struct device will be returned from the call.
+ * Any further sysfs files that might be required can be created using this
+ * pointer.
+ *
+ * Note: the struct class passed to this function must have previously
+ * been created with a call to class_create().
+ */
+struct device *device_create(struct class *class, struct device *parent,
+			     dev_t devt, const char *fmt, ...)
+{
+	va_list vargs;
+	struct device *dev;
+
+	va_start(vargs, fmt);
+	dev = device_create_vargs(class, parent, devt, NULL, fmt, vargs);
+	va_end(vargs);
+	return dev;
+}
 EXPORT_SYMBOL_GPL(device_create);
 
 static int __match_devt(struct device *dev, void *data)
diff --git a/include/linux/device.h b/include/linux/device.h
index 15e9fa3ad3af..14616e80213c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -449,9 +449,21 @@ extern int __must_check device_reprobe(struct device *dev);
 /*
  * Easy functions for dynamically creating devices on the fly
  */
+extern struct device *device_create_vargs(struct class *cls,
+					  struct device *parent,
+					  dev_t devt,
+					  void *drvdata,
+					  const char *fmt,
+					  va_list vargs);
 extern struct device *device_create(struct class *cls, struct device *parent,
 				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf, 4, 5)));
+extern struct device *device_create_drvdata(struct class *cls,
+					    struct device *parent,
+					    dev_t devt,
+					    void *drvdata,
+					    const char *fmt, ...)
+				    __attribute__((format(printf, 5, 6)));
 extern void device_destroy(struct class *cls, dev_t devt);
 
 /*
-- 
cgit v1.2.3


From afba937e540c902c989cd516fd97ea0c8499bb27 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 13 May 2008 17:01:25 +0200
Subject: USB: CDC WDM driver

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/class/Kconfig   |  11 +
 drivers/usb/class/Makefile  |   1 +
 drivers/usb/class/cdc-wdm.c | 740 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/cdc.h     |   9 +
 4 files changed, 761 insertions(+)
 create mode 100644 drivers/usb/class/cdc-wdm.c

(limited to 'include/linux')

diff --git a/drivers/usb/class/Kconfig b/drivers/usb/class/Kconfig
index 3a9102d2591b..66f17ed88cb5 100644
--- a/drivers/usb/class/Kconfig
+++ b/drivers/usb/class/Kconfig
@@ -29,3 +29,14 @@ config USB_PRINTER
 	  To compile this driver as a module, choose M here: the
 	  module will be called usblp.
 
+config USB_WDM
+	tristate "USB Wireless Device Management support"
+	depends on USB
+	---help---
+	  This driver supports the WMC Device Management functionality
+	  of cell phones compliant to the CDC WMC specification. You can use
+	  AT commands over this device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cdc-wdm.
+
diff --git a/drivers/usb/class/Makefile b/drivers/usb/class/Makefile
index cc391e6c2af8..535d59a30600 100644
--- a/drivers/usb/class/Makefile
+++ b/drivers/usb/class/Makefile
@@ -5,3 +5,4 @@
 
 obj-$(CONFIG_USB_ACM)		+= cdc-acm.o
 obj-$(CONFIG_USB_PRINTER)	+= usblp.o
+obj-$(CONFIG_USB_WDM)		+= cdc-wdm.o
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
new file mode 100644
index 000000000000..107666d4e2ec
--- /dev/null
+++ b/drivers/usb/class/cdc-wdm.c
@@ -0,0 +1,740 @@
+/*
+ * cdc-wdm.c
+ *
+ * This driver supports USB CDC WCM Device Management.
+ *
+ * Copyright (c) 2007-2008 Oliver Neukum
+ *
+ * Some code taken from cdc-acm.c
+ *
+ * Released under the GPLv2.
+ *
+ * Many thanks to Carl Nordbeck
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+#include <linux/mutex.h>
+#include <linux/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/poll.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+/*
+ * Version Information
+ */
+#define DRIVER_VERSION "v0.02"
+#define DRIVER_AUTHOR "Oliver Neukum"
+
+static struct usb_device_id wdm_ids[] = {
+	{
+		.match_flags = USB_DEVICE_ID_MATCH_INT_CLASS |
+				 USB_DEVICE_ID_MATCH_INT_SUBCLASS,
+		.bInterfaceClass = USB_CLASS_COMM,
+		.bInterfaceSubClass = USB_CDC_SUBCLASS_DMM
+	},
+	{ }
+};
+
+#define WDM_MINOR_BASE	176
+
+
+#define WDM_IN_USE		1
+#define WDM_DISCONNECTING	2
+#define WDM_RESULT		3
+#define WDM_READ		4
+#define WDM_INT_STALL		5
+#define WDM_POLL_RUNNING	6
+
+
+#define WDM_MAX			16
+
+
+static DEFINE_MUTEX(wdm_mutex);
+
+/* --- method tables --- */
+
+struct wdm_device {
+	u8			*inbuf; /* buffer for response */
+	u8			*outbuf; /* buffer for command */
+	u8			*sbuf; /* buffer for status */
+	u8			*ubuf; /* buffer for copy to user space */
+
+	struct urb		*command;
+	struct urb		*response;
+	struct urb		*validity;
+	struct usb_interface	*intf;
+	struct usb_ctrlrequest	*orq;
+	struct usb_ctrlrequest	*irq;
+	spinlock_t		iuspin;
+
+	unsigned long		flags;
+	u16			bufsize;
+	u16			wMaxCommand;
+	u16			wMaxPacketSize;
+	u16			bMaxPacketSize0;
+	__le16			inum;
+	int			reslength;
+	int			length;
+	int			read;
+	int			count;
+	dma_addr_t		shandle;
+	dma_addr_t		ihandle;
+	struct mutex		wlock;
+	struct mutex		rlock;
+	wait_queue_head_t	wait;
+	struct work_struct	rxwork;
+	int			werr;
+	int			rerr;
+};
+
+static struct usb_driver wdm_driver;
+
+/* --- callbacks --- */
+static void wdm_out_callback(struct urb *urb)
+{
+	struct wdm_device *desc;
+	desc = urb->context;
+	spin_lock(&desc->iuspin);
+	desc->werr = urb->status;
+	spin_unlock(&desc->iuspin);
+	clear_bit(WDM_IN_USE, &desc->flags);
+	kfree(desc->outbuf);
+	wake_up(&desc->wait);
+}
+
+static void wdm_in_callback(struct urb *urb)
+{
+	struct wdm_device *desc = urb->context;
+	int status = urb->status;
+
+	spin_lock(&desc->iuspin);
+
+	if (status) {
+		switch (status) {
+		case -ENOENT:
+			dev_dbg(&desc->intf->dev,
+				"nonzero urb status received: -ENOENT");
+			break;
+		case -ECONNRESET:
+			dev_dbg(&desc->intf->dev,
+				"nonzero urb status received: -ECONNRESET");
+			break;
+		case -ESHUTDOWN:
+			dev_dbg(&desc->intf->dev,
+				"nonzero urb status received: -ESHUTDOWN");
+			break;
+		case -EPIPE:
+			err("nonzero urb status received: -EPIPE");
+			break;
+		default:
+			err("Unexpected error %d", status);
+			break;
+		}
+	}
+
+	desc->rerr = status;
+	desc->reslength = urb->actual_length;
+	memmove(desc->ubuf + desc->length, desc->inbuf, desc->reslength);
+	desc->length += desc->reslength;
+	wake_up(&desc->wait);
+
+	set_bit(WDM_READ, &desc->flags);
+	spin_unlock(&desc->iuspin);
+}
+
+static void wdm_int_callback(struct urb *urb)
+{
+	int rv = 0;
+	int status = urb->status;
+	struct wdm_device *desc;
+	struct usb_ctrlrequest *req;
+	struct usb_cdc_notification *dr;
+
+	desc = urb->context;
+	req = desc->irq;
+	dr = (struct usb_cdc_notification *)desc->sbuf;
+
+	if (status) {
+		switch (status) {
+		case -ESHUTDOWN:
+		case -ENOENT:
+		case -ECONNRESET:
+			return; /* unplug */
+		case -EPIPE:
+			set_bit(WDM_INT_STALL, &desc->flags);
+			err("Stall on int endpoint");
+			goto sw; /* halt is cleared in work */
+		default:
+			err("nonzero urb status received: %d", status);
+			break;
+		}
+	}
+
+	if (urb->actual_length < sizeof(struct usb_cdc_notification)) {
+		err("wdm_int_callback - %d bytes", urb->actual_length);
+		goto exit;
+	}
+
+	switch (dr->bNotificationType) {
+	case USB_CDC_NOTIFY_RESPONSE_AVAILABLE:
+		dev_dbg(&desc->intf->dev,
+			"NOTIFY_RESPONSE_AVAILABLE received: index %d len %d",
+			dr->wIndex, dr->wLength);
+		break;
+
+	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+
+		dev_dbg(&desc->intf->dev,
+			"NOTIFY_NETWORK_CONNECTION %s network",
+			dr->wValue ? "connected to" : "disconnected from");
+		goto exit;
+	default:
+		clear_bit(WDM_POLL_RUNNING, &desc->flags);
+		err("unknown notification %d received: index %d len %d",
+			dr->bNotificationType, dr->wIndex, dr->wLength);
+		goto exit;
+	}
+
+	req->bRequestType = (USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE);
+	req->bRequest = USB_CDC_GET_ENCAPSULATED_RESPONSE;
+	req->wValue = 0;
+	req->wIndex = desc->inum;
+	req->wLength = cpu_to_le16(desc->bMaxPacketSize0);
+
+	usb_fill_control_urb(
+		desc->response,
+		interface_to_usbdev(desc->intf),
+		/* using common endpoint 0 */
+		usb_rcvctrlpipe(interface_to_usbdev(desc->intf), 0),
+		(unsigned char *)req,
+		desc->inbuf,
+		desc->bMaxPacketSize0,
+		wdm_in_callback,
+		desc
+	);
+	desc->response->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	spin_lock(&desc->iuspin);
+	clear_bit(WDM_READ, &desc->flags);
+	if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
+		rv = usb_submit_urb(desc->response, GFP_ATOMIC);
+		dev_dbg(&desc->intf->dev, "%s: usb_submit_urb %d",
+			__func__, rv);
+	}
+	spin_unlock(&desc->iuspin);
+	if (rv < 0) {
+		if (rv == -EPERM)
+			return;
+		if (rv == -ENOMEM) {
+sw:
+			rv = schedule_work(&desc->rxwork);
+			if (rv)
+				err("Cannot schedule work");
+		}
+	}
+exit:
+	rv = usb_submit_urb(urb, GFP_ATOMIC);
+	if (rv)
+		err("%s - usb_submit_urb failed with result %d",
+		     __func__, rv);
+
+}
+
+static void kill_urbs(struct wdm_device *desc)
+{
+	usb_kill_urb(desc->command);
+	usb_kill_urb(desc->validity);
+	usb_kill_urb(desc->response);
+}
+
+static void free_urbs(struct wdm_device *desc)
+{
+	usb_free_urb(desc->validity);
+	usb_free_urb(desc->response);
+	usb_free_urb(desc->command);
+}
+
+static void cleanup(struct wdm_device *desc)
+{
+	usb_buffer_free(interface_to_usbdev(desc->intf),
+			desc->wMaxPacketSize,
+			desc->sbuf,
+			desc->validity->transfer_dma);
+	usb_buffer_free(interface_to_usbdev(desc->intf),
+			desc->wMaxPacketSize,
+			desc->inbuf,
+			desc->response->transfer_dma);
+	kfree(desc->orq);
+	kfree(desc->irq);
+	kfree(desc->ubuf);
+	free_urbs(desc);
+	kfree(desc);
+}
+
+static ssize_t wdm_write
+(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
+{
+	u8 *buf;
+	int rv = -EMSGSIZE, r, we;
+	struct wdm_device *desc = file->private_data;
+	struct usb_ctrlrequest *req;
+
+	if (count > desc->wMaxCommand)
+		count = desc->wMaxCommand;
+
+	spin_lock_irq(&desc->iuspin);
+	we = desc->werr;
+	desc->werr = 0;
+	spin_unlock_irq(&desc->iuspin);
+	if (we < 0)
+		return -EIO;
+
+	r = mutex_lock_interruptible(&desc->wlock); /* concurrent writes */
+	rv = -ERESTARTSYS;
+	if (r)
+		goto outnl;
+
+	r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE,
+							   &desc->flags));
+	if (r < 0)
+		goto out;
+
+	if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
+		rv = -ENODEV;
+		goto out;
+	}
+
+	desc->outbuf = buf = kmalloc(count, GFP_KERNEL);
+	if (!buf) {
+		rv = -ENOMEM;
+		goto out;
+	}
+
+	r = copy_from_user(buf, buffer, count);
+	if (r > 0) {
+		kfree(buf);
+		rv = -EFAULT;
+		goto out;
+	}
+
+	req = desc->orq;
+	usb_fill_control_urb(
+		desc->command,
+		interface_to_usbdev(desc->intf),
+		/* using common endpoint 0 */
+		usb_sndctrlpipe(interface_to_usbdev(desc->intf), 0),
+		(unsigned char *)req,
+		buf,
+		count,
+		wdm_out_callback,
+		desc
+	);
+
+	req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS |
+			     USB_RECIP_INTERFACE);
+	req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND;
+	req->wValue = 0;
+	req->wIndex = desc->inum;
+	req->wLength = cpu_to_le16(count);
+	set_bit(WDM_IN_USE, &desc->flags);
+
+	rv = usb_submit_urb(desc->command, GFP_KERNEL);
+	if (rv < 0) {
+		kfree(buf);
+		clear_bit(WDM_IN_USE, &desc->flags);
+	} else {
+		dev_dbg(&desc->intf->dev, "Tx URB has been submitted index=%d",
+			req->wIndex);
+	}
+out:
+	mutex_unlock(&desc->wlock);
+outnl:
+	return rv < 0 ? rv : count;
+}
+
+static ssize_t wdm_read
+(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+{
+	int rv, cntr;
+	int i = 0;
+	struct wdm_device *desc = file->private_data;
+
+
+	rv = mutex_lock_interruptible(&desc->rlock); /*concurrent reads */
+	if (rv < 0)
+		return -ERESTARTSYS;
+
+	if (desc->length == 0) {
+		desc->read = 0;
+retry:
+		i++;
+		rv = wait_event_interruptible(desc->wait,
+					      test_bit(WDM_READ, &desc->flags));
+
+		if (rv < 0) {
+			rv = -ERESTARTSYS;
+			goto err;
+		}
+
+		spin_lock_irq(&desc->iuspin);
+
+		if (desc->rerr) { /* read completed, error happened */
+			int t = desc->rerr;
+			desc->rerr = 0;
+			spin_unlock_irq(&desc->iuspin);
+			err("reading had resulted in %d", t);
+			rv = -EIO;
+			goto err;
+		}
+		/*
+		 * recheck whether we've lost the race
+		 * against the completion handler
+		 */
+		if (!test_bit(WDM_READ, &desc->flags)) { /* lost race */
+			spin_unlock_irq(&desc->iuspin);
+			goto retry;
+		}
+		if (!desc->reslength) { /* zero length read */
+			spin_unlock_irq(&desc->iuspin);
+			goto retry;
+		}
+		clear_bit(WDM_READ, &desc->flags);
+		spin_unlock_irq(&desc->iuspin);
+	}
+
+	cntr = count > desc->length ? desc->length : count;
+	rv = copy_to_user(buffer, desc->ubuf, cntr);
+	if (rv > 0) {
+		rv = -EFAULT;
+		goto err;
+	}
+
+	for (i = 0; i < desc->length - cntr; i++)
+		desc->ubuf[i] = desc->ubuf[i + cntr];
+
+	desc->length -= cntr;
+	rv = cntr;
+
+err:
+	mutex_unlock(&desc->rlock);
+	if (rv < 0)
+		err("wdm_read: exit error");
+	return rv;
+}
+
+static int wdm_flush(struct file *file, fl_owner_t id)
+{
+	struct wdm_device *desc = file->private_data;
+
+	wait_event(desc->wait, !test_bit(WDM_IN_USE, &desc->flags));
+	if (desc->werr < 0)
+		err("Error in flush path: %d", desc->werr);
+
+	return desc->werr;
+}
+
+static unsigned int wdm_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct wdm_device *desc = file->private_data;
+	unsigned long flags;
+	unsigned int mask = 0;
+
+	spin_lock_irqsave(&desc->iuspin, flags);
+	if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
+		mask = POLLERR;
+		spin_unlock_irqrestore(&desc->iuspin, flags);
+		goto desc_out;
+	}
+	if (test_bit(WDM_READ, &desc->flags))
+		mask = POLLIN | POLLRDNORM;
+	if (desc->rerr || desc->werr)
+		mask |= POLLERR;
+	if (!test_bit(WDM_IN_USE, &desc->flags))
+		mask |= POLLOUT | POLLWRNORM;
+	spin_unlock_irqrestore(&desc->iuspin, flags);
+
+	poll_wait(file, &desc->wait, wait);
+
+desc_out:
+	return mask;
+}
+
+static int wdm_open(struct inode *inode, struct file *file)
+{
+	int minor = iminor(inode);
+	int rv = -ENODEV;
+	struct usb_interface *intf;
+	struct wdm_device *desc;
+
+	mutex_lock(&wdm_mutex);
+	intf = usb_find_interface(&wdm_driver, minor);
+	if (!intf)
+		goto out;
+
+	desc = usb_get_intfdata(intf);
+	if (test_bit(WDM_DISCONNECTING, &desc->flags))
+		goto out;
+
+	desc->count++;
+	file->private_data = desc;
+
+	rv = usb_submit_urb(desc->validity, GFP_KERNEL);
+
+	if (rv < 0) {
+		desc->count--;
+		err("Error submitting int urb - %d", rv);
+		goto out;
+	}
+	rv = 0;
+
+out:
+	mutex_unlock(&wdm_mutex);
+	return rv;
+}
+
+static int wdm_release(struct inode *inode, struct file *file)
+{
+	struct wdm_device *desc = file->private_data;
+
+	mutex_lock(&wdm_mutex);
+	desc->count--;
+	if (!desc->count) {
+		dev_dbg(&desc->intf->dev, "wdm_release: cleanup");
+		kill_urbs(desc);
+	}
+	mutex_unlock(&wdm_mutex);
+	return 0;
+}
+
+static const struct file_operations wdm_fops = {
+	.owner =	THIS_MODULE,
+	.read =		wdm_read,
+	.write =	wdm_write,
+	.open =		wdm_open,
+	.flush =	wdm_flush,
+	.release =	wdm_release,
+	.poll =		wdm_poll
+};
+
+static struct usb_class_driver wdm_class = {
+	.name =		"cdc-wdm%d",
+	.fops =		&wdm_fops,
+	.minor_base =	WDM_MINOR_BASE,
+};
+
+/* --- error handling --- */
+static void wdm_rxwork(struct work_struct *work)
+{
+	struct wdm_device *desc = container_of(work, struct wdm_device, rxwork);
+	unsigned long flags;
+	int rv;
+
+	spin_lock_irqsave(&desc->iuspin, flags);
+	if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
+		spin_unlock_irqrestore(&desc->iuspin, flags);
+	} else {
+		spin_unlock_irqrestore(&desc->iuspin, flags);
+		rv = usb_submit_urb(desc->response, GFP_KERNEL);
+		if (rv < 0 && rv != -EPERM) {
+			spin_lock_irqsave(&desc->iuspin, flags);
+			if (!test_bit(WDM_DISCONNECTING, &desc->flags))
+				schedule_work(&desc->rxwork);
+			spin_unlock_irqrestore(&desc->iuspin, flags);
+		}
+	}
+}
+
+/* --- hotplug --- */
+
+static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id)
+{
+	int rv = -EINVAL;
+	struct usb_device *udev = interface_to_usbdev(intf);
+	struct wdm_device *desc;
+	struct usb_host_interface *iface;
+	struct usb_endpoint_descriptor *ep;
+	struct usb_cdc_dmm_desc *dmhd;
+	u8 *buffer = intf->altsetting->extra;
+	int buflen = intf->altsetting->extralen;
+	u16 maxcom = 0;
+
+	if (!buffer)
+		goto out;
+
+	while (buflen > 0) {
+		if (buffer [1] != USB_DT_CS_INTERFACE) {
+			err("skipping garbage");
+			goto next_desc;
+		}
+
+		switch (buffer [2]) {
+		case USB_CDC_HEADER_TYPE:
+			break;
+		case USB_CDC_DMM_TYPE:
+			dmhd = (struct usb_cdc_dmm_desc *)buffer;
+			maxcom = le16_to_cpu(dmhd->wMaxCommand);
+			dev_dbg(&intf->dev,
+				"Finding maximum buffer length: %d", maxcom);
+			break;
+		default:
+			err("Ignoring extra header, type %d, length %d",
+				buffer[2], buffer[0]);
+			break;
+		}
+next_desc:
+		buflen -= buffer[0];
+		buffer += buffer[0];
+	}
+
+	rv = -ENOMEM;
+	desc = kzalloc(sizeof(struct wdm_device), GFP_KERNEL);
+	if (!desc)
+		goto out;
+	mutex_init(&desc->wlock);
+	mutex_init(&desc->rlock);
+	spin_lock_init(&desc->iuspin);
+	init_waitqueue_head(&desc->wait);
+	desc->wMaxCommand = maxcom;
+	desc->inum = cpu_to_le16((u16)intf->cur_altsetting->desc.bInterfaceNumber);
+	desc->intf = intf;
+	INIT_WORK(&desc->rxwork, wdm_rxwork);
+
+	iface = &intf->altsetting[0];
+	ep = &iface->endpoint[0].desc;
+	if (!usb_endpoint_is_int_in(ep)) {
+		rv = -EINVAL;
+		goto err;
+	}
+
+	desc->wMaxPacketSize = ep->wMaxPacketSize;
+	desc->bMaxPacketSize0 = cpu_to_le16(udev->descriptor.bMaxPacketSize0);
+
+	desc->orq = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
+	if (!desc->orq)
+		goto err;
+	desc->irq = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
+	if (!desc->irq)
+		goto err;
+
+	desc->validity = usb_alloc_urb(0, GFP_KERNEL);
+	if (!desc->validity)
+		goto err;
+
+	desc->response = usb_alloc_urb(0, GFP_KERNEL);
+	if (!desc->response)
+		goto err;
+
+	desc->command = usb_alloc_urb(0, GFP_KERNEL);
+	if (!desc->command)
+		goto err;
+
+	desc->ubuf = kmalloc(desc->wMaxCommand, GFP_KERNEL);
+	if (!desc->ubuf)
+		goto err;
+
+	desc->sbuf = usb_buffer_alloc(interface_to_usbdev(intf),
+					desc->wMaxPacketSize,
+					GFP_KERNEL,
+					&desc->validity->transfer_dma);
+	if (!desc->sbuf)
+		goto err;
+
+	desc->inbuf = usb_buffer_alloc(interface_to_usbdev(intf),
+					desc->bMaxPacketSize0,
+					GFP_KERNEL,
+					&desc->response->transfer_dma);
+	if (!desc->inbuf)
+		goto err2;
+
+	usb_fill_int_urb(
+		desc->validity,
+		interface_to_usbdev(intf),
+		usb_rcvintpipe(interface_to_usbdev(intf), ep->bEndpointAddress),
+		desc->sbuf,
+		desc->wMaxPacketSize,
+		wdm_int_callback,
+		desc,
+		ep->bInterval
+	);
+	desc->validity->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+	usb_set_intfdata(intf, desc);
+	rv = usb_register_dev(intf, &wdm_class);
+	dev_info(&intf->dev, "cdc-wdm%d: USB WDM device\n",
+		 intf->minor - WDM_MINOR_BASE);
+	if (rv < 0)
+		goto err;
+out:
+	return rv;
+err2:
+	usb_buffer_free(interface_to_usbdev(desc->intf),
+			desc->wMaxPacketSize,
+			desc->sbuf,
+			desc->validity->transfer_dma);
+err:
+	free_urbs(desc);
+	kfree(desc->ubuf);
+	kfree(desc->orq);
+	kfree(desc->irq);
+	kfree(desc);
+	return rv;
+}
+
+static void wdm_disconnect(struct usb_interface *intf)
+{
+	struct wdm_device *desc;
+	unsigned long flags;
+
+	usb_deregister_dev(intf, &wdm_class);
+	mutex_lock(&wdm_mutex);
+	desc = usb_get_intfdata(intf);
+
+	/* the spinlock makes sure no new urbs are generated in the callbacks */
+	spin_lock_irqsave(&desc->iuspin, flags);
+	set_bit(WDM_DISCONNECTING, &desc->flags);
+	set_bit(WDM_READ, &desc->flags);
+	clear_bit(WDM_IN_USE, &desc->flags);
+	spin_unlock_irqrestore(&desc->iuspin, flags);
+	cancel_work_sync(&desc->rxwork);
+	kill_urbs(desc);
+	wake_up_all(&desc->wait);
+	if (!desc->count)
+		cleanup(desc);
+	mutex_unlock(&wdm_mutex);
+}
+
+static struct usb_driver wdm_driver = {
+	.name =		"cdc_wdm",
+	.probe =	wdm_probe,
+	.disconnect =	wdm_disconnect,
+	.id_table =	wdm_ids,
+};
+
+/* --- low level module stuff --- */
+
+static int __init wdm_init(void)
+{
+	int rv;
+
+	rv = usb_register(&wdm_driver);
+
+	return rv;
+}
+
+static void __exit wdm_exit(void)
+{
+	usb_deregister(&wdm_driver);
+}
+
+module_init(wdm_init);
+module_exit(wdm_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION("USB Abstract Control Model driver for "
+		   "USB WCM Device Management");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index 71e52f2f6a38..ca228bb94218 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -130,6 +130,15 @@ struct usb_cdc_ether_desc {
 	__u8	bNumberPowerFilters;
 } __attribute__ ((packed));
 
+/* "Telephone Control Model Functional Descriptor" from CDC WMC spec 6.3..3 */
+struct usb_cdc_dmm_desc {
+	__u8	bFunctionLength;
+	__u8	bDescriptorType;
+	__u8	bDescriptorSubtype;
+	__u16	bcdVersion;
+	__le16	wMaxCommand;
+} __attribute__ ((packed));
+
 /* "MDLM Functional Descriptor" from CDC WMC spec 6.7.2.3 */
 struct usb_cdc_mdlm_desc {
 	__u8	bLength;
-- 
cgit v1.2.3


From 5fb13570543f4ae022996c9d7c0c099c8abf22dd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 20 May 2008 14:54:50 -0700
Subject: [VLAN]: Propagate selected feature bits to VLAN devices

Propagate feature bits from the NETDEV_FEAT_CHANGE notifier. For now
only TSO is propagated for devices that announce their ability to
support TSO in combination with VLAN accel by setting the NETIF_F_VLAN_TSO
flag.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 ++++--
 net/8021q/vlan.c          | 30 ++++++++++++++++++++++++++++++
 net/8021q/vlan.h          |  2 ++
 net/8021q/vlan_dev.c      |  5 +++++
 4 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b11e6e19e96c..2b0266484c84 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -514,10 +514,12 @@ struct net_device
 #define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
 #define NETIF_F_MULTI_QUEUE	16384	/* Has multiple TX/RX queues */
 #define NETIF_F_LRO		32768	/* large receive offload */
+#define NETIF_F_VLAN_TSO	65536	/* Supports TSO for VLANs */
+#define NETIF_F_VLAN_CSUM	131072	/* Supports TX checksumming for VLANs */
 
 	/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	16
-#define NETIF_F_GSO_MASK	0xffff0000
+#define NETIF_F_GSO_SHIFT	20
+#define NETIF_F_GSO_MASK	0xfff00000
 #define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
 #define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
 #define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index b934159a6f07..51961300b586 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -382,6 +382,24 @@ static void vlan_sync_address(struct net_device *dev,
 	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
 
+static void vlan_transfer_features(struct net_device *dev,
+				   struct net_device *vlandev)
+{
+	unsigned long old_features = vlandev->features;
+
+	if (dev->features & NETIF_F_VLAN_TSO) {
+		vlandev->features &= ~VLAN_TSO_FEATURES;
+		vlandev->features |= dev->features & VLAN_TSO_FEATURES;
+	}
+	if (dev->features & NETIF_F_VLAN_CSUM) {
+		vlandev->features &= ~NETIF_F_ALL_CSUM;
+		vlandev->features |= dev->features & NETIF_F_ALL_CSUM;
+	}
+
+	if (old_features != vlandev->features)
+		netdev_features_change(vlandev);
+}
+
 static void __vlan_device_event(struct net_device *dev, unsigned long event)
 {
 	switch (event) {
@@ -448,6 +466,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		}
 		break;
 
+	case NETDEV_FEAT_CHANGE:
+		/* Propagate device features to underlying device */
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			vlandev = vlan_group_get_device(grp, i);
+			if (!vlandev)
+				continue;
+
+			vlan_transfer_features(dev, vlandev);
+		}
+
+		break;
+
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
 		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5229a72c7ea1..79625696e86a 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -7,6 +7,8 @@
 #define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
 #define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
 
+#define VLAN_TSO_FEATURES	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG)
+
 /*  Find a VLAN device by the MAC address of its Ethernet device, and
  *  it's VLAN ID.  The default configuration is to have VLAN's scope
  *  to be box-wide, so the MAC will be ignored.  The mac will only be
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c961f0826005..b1cfbaa88db2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -663,6 +663,11 @@ static int vlan_dev_init(struct net_device *dev)
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
+	if (real_dev->features & NETIF_F_VLAN_TSO)
+		dev->features |= real_dev->features & VLAN_TSO_FEATURES;
+	if (real_dev->features & NETIF_F_VLAN_CSUM)
+		dev->features |= real_dev->features & NETIF_F_ALL_CSUM;
+
 	/* ipv6 shared card related stuff */
 	dev->dev_id = real_dev->dev_id;
 
-- 
cgit v1.2.3


From c8942f1f0a7e2160ebf2e51ba89e50ee5895a1e7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 21 May 2008 14:08:38 -0700
Subject: netfilter: Move linux/types.h inclusions outside of #ifdef __KERNEL__

Greg Steuck <greg@nest.cx> points out that some of the netfilter
headers can't be used in userspace without including linux/types.h
first. The headers include their own linux/types.h include statements,
these are stripped by make headers-install because they are inside
#ifdef __KERNEL__ however. Move them out to fix this.

Reported and Tested by Greg Steuck.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h                 | 2 +-
 include/linux/netfilter_arp/arp_tables.h  | 2 +-
 include/linux/netfilter_ipv4/ip_tables.h  | 2 +-
 include/linux/netfilter_ipv6/ip6_tables.h | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index e4c66593b5c6..0c5eb7ed8b3f 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -3,7 +3,6 @@
 
 #ifdef __KERNEL__
 #include <linux/init.h>
-#include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/net.h>
 #include <linux/netdevice.h>
@@ -14,6 +13,7 @@
 #include <linux/list.h>
 #include <net/net_namespace.h>
 #endif
+#include <linux/types.h>
 #include <linux/compiler.h>
 
 /* Responses from hook functions. */
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index dd9c97f2d436..590ac3d6d5d6 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -11,11 +11,11 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
-#include <linux/types.h>
 #include <linux/in.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #endif
+#include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/netfilter_arp.h>
 
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index bfc889f90276..092bd50581a9 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -17,11 +17,11 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
-#include <linux/types.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #endif
+#include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/netfilter_ipv4.h>
 
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index f2507dcc5750..1089e33cf633 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -17,11 +17,11 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
-#include <linux/types.h>
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/skbuff.h>
 #endif
+#include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/netfilter_ipv6.h>
 
-- 
cgit v1.2.3


From 4b749440445ebcb6fad402fc762bc35af871f689 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 May 2008 17:40:05 -0700
Subject: tcp: Make prior_ssthresh a u32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If previous window was above representable values of u16,
strange things will happen if undo with the truncated value
is called for. Alternatively, this could be fixed by some
max trickery but that would limit undoing high-speed undos.

Adds 16-bit hole but there isn't anything to fill it with.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d96d9b122304..18e62e3d406f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -355,7 +355,7 @@ struct tcp_sock {
 	u32	lost_retrans_low;	/* Sent seq after any rxmit (lowest) */
 
 	u16	advmss;		/* Advertised MSS			*/
-	u16	prior_ssthresh; /* ssthresh saved at recovery start	*/
+	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/
 	u32	lost_out;	/* Lost packets			*/
 	u32	sacked_out;	/* SACK'd packets			*/
 	u32	fackets_out;	/* FACK'd packets			*/
-- 
cgit v1.2.3


From 289c79a4bd350e8a25065102563ad1a183d1b402 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 May 2008 00:22:04 -0700
Subject: vlan: Use bitmask of feature flags instead of seperate feature bits

Herbert Xu points out that the use of seperate feature bits for features
to be propagated to VLAN devices is going to get messy real soon.
Replace the VLAN feature bits by a bitmask of feature flags to be
propagated and restore the old GSO_SHIFT/MASK values.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 +++++----
 net/8021q/vlan.c          | 10 ++--------
 net/8021q/vlan.h          |  2 --
 net/8021q/vlan_dev.c      |  5 +----
 4 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2b0266484c84..f27fd2009334 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -514,12 +514,10 @@ struct net_device
 #define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
 #define NETIF_F_MULTI_QUEUE	16384	/* Has multiple TX/RX queues */
 #define NETIF_F_LRO		32768	/* large receive offload */
-#define NETIF_F_VLAN_TSO	65536	/* Supports TSO for VLANs */
-#define NETIF_F_VLAN_CSUM	131072	/* Supports TX checksumming for VLANs */
 
 	/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	20
-#define NETIF_F_GSO_MASK	0xfff00000
+#define NETIF_F_GSO_SHIFT	16
+#define NETIF_F_GSO_MASK	0xffff0000
 #define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
 #define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
 #define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
@@ -747,6 +745,9 @@ struct net_device
 	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
 
+	/* VLAN feature mask */
+	unsigned long vlan_features;
+
 	/* for setting kernel sock attribute on TCP connection setup */
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 51961300b586..ab2225da0ee2 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -387,14 +387,8 @@ static void vlan_transfer_features(struct net_device *dev,
 {
 	unsigned long old_features = vlandev->features;
 
-	if (dev->features & NETIF_F_VLAN_TSO) {
-		vlandev->features &= ~VLAN_TSO_FEATURES;
-		vlandev->features |= dev->features & VLAN_TSO_FEATURES;
-	}
-	if (dev->features & NETIF_F_VLAN_CSUM) {
-		vlandev->features &= ~NETIF_F_ALL_CSUM;
-		vlandev->features |= dev->features & NETIF_F_ALL_CSUM;
-	}
+	vlandev->features &= ~dev->vlan_features;
+	vlandev->features |= dev->features & dev->vlan_features;
 
 	if (old_features != vlandev->features)
 		netdev_features_change(vlandev);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 79625696e86a..5229a72c7ea1 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -7,8 +7,6 @@
 #define VLAN_GRP_HASH_SIZE	(1 << VLAN_GRP_HASH_SHIFT)
 #define VLAN_GRP_HASH_MASK	(VLAN_GRP_HASH_SIZE - 1)
 
-#define VLAN_TSO_FEATURES	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG)
-
 /*  Find a VLAN device by the MAC address of its Ethernet device, and
  *  it's VLAN ID.  The default configuration is to have VLAN's scope
  *  to be box-wide, so the MAC will be ignored.  The mac will only be
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b1cfbaa88db2..5d055c242ed8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -663,10 +663,7 @@ static int vlan_dev_init(struct net_device *dev)
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
-	if (real_dev->features & NETIF_F_VLAN_TSO)
-		dev->features |= real_dev->features & VLAN_TSO_FEATURES;
-	if (real_dev->features & NETIF_F_VLAN_CSUM)
-		dev->features |= real_dev->features & NETIF_F_ALL_CSUM;
+	dev->features |= real_dev->features & real_dev->vlan_features;
 
 	/* ipv6 shared card related stuff */
 	dev->dev_id = real_dev->dev_id;
-- 
cgit v1.2.3


From b8fdaf5a05adbf80e5a943bb3f65b46b5fb9b488 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Fri, 23 May 2008 13:04:25 -0700
Subject: i5k_amb: support Intel 5400 chipset

Minor rework to support the Intel 5400 chipset.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/i5k_amb.c | 39 ++++++++++++++++++++++++++++++++++-----
 include/linux/pci_ids.h |  3 +++
 2 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c
index 6ac5c6f53585..f9e2ed621f7b 100644
--- a/drivers/hwmon/i5k_amb.c
+++ b/drivers/hwmon/i5k_amb.c
@@ -111,6 +111,7 @@ struct i5k_amb_data {
 	void __iomem *amb_mmio;
 	struct i5k_device_attribute *attrs;
 	unsigned int num_attrs;
+	unsigned long chipset_id;
 };
 
 static ssize_t show_name(struct device *dev, struct device_attribute *devattr,
@@ -382,7 +383,8 @@ err:
 	return res;
 }
 
-static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data)
+static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data,
+					    unsigned long devid)
 {
 	struct pci_dev *pcidev;
 	u32 val32;
@@ -390,7 +392,7 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data)
 
 	/* Find AMB register memory space */
 	pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
-				PCI_DEVICE_ID_INTEL_5000_ERR,
+				devid,
 				NULL);
 	if (!pcidev)
 		return -ENODEV;
@@ -409,6 +411,8 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data)
 		goto out;
 	}
 
+	data->chipset_id = devid;
+
 	res = 0;
 out:
 	pci_dev_put(pcidev);
@@ -441,10 +445,30 @@ out:
 	return res;
 }
 
+static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data,
+					unsigned long channel)
+{
+	switch (data->chipset_id) {
+	case PCI_DEVICE_ID_INTEL_5000_ERR:
+		return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel;
+	case PCI_DEVICE_ID_INTEL_5400_ERR:
+		return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel;
+	default:
+		BUG();
+	}
+}
+
+static unsigned long chipset_ids[] = {
+	PCI_DEVICE_ID_INTEL_5000_ERR,
+	PCI_DEVICE_ID_INTEL_5400_ERR,
+	0
+};
+
 static int __devinit i5k_amb_probe(struct platform_device *pdev)
 {
 	struct i5k_amb_data *data;
 	struct resource *reso;
+	int i;
 	int res = -ENODEV;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -452,19 +476,24 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	/* Figure out where the AMB registers live */
-	res = i5k_find_amb_registers(data);
+	i = 0;
+	do {
+		res = i5k_find_amb_registers(data, chipset_ids[i]);
+		i++;
+	} while (res && chipset_ids[i]);
+
 	if (res)
 		goto err;
 
 	/* Copy the DIMM presence map for the first two channels */
 	res = i5k_channel_probe(&data->amb_present[0],
-				PCI_DEVICE_ID_INTEL_5000_FBD0);
+				i5k_channel_pci_id(data, 0));
 	if (res)
 		goto err;
 
 	/* Copy the DIMM presence map for the optional second two channels */
 	i5k_channel_probe(&data->amb_present[2],
-			  PCI_DEVICE_ID_INTEL_5000_FBD1);
+			  i5k_channel_pci_id(data, 1));
 
 	/* Set up resource regions */
 	reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cf6dbd759395..342b55afb0c3 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2383,6 +2383,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
+#define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
+#define PCI_DEVICE_ID_INTEL_5400_FBD0	0x4035
+#define PCI_DEVICE_ID_INTEL_5400_FBD1	0x4036
 #define PCI_DEVICE_ID_INTEL_IOAT_SCNB	0x65ff
 #define PCI_DEVICE_ID_INTEL_TOLAPAI_0	0x5031
 #define PCI_DEVICE_ID_INTEL_TOLAPAI_1	0x5032
-- 
cgit v1.2.3


From 4b6f6ce97ecc20eb8f3ece3c8370faacfe73e8c2 Mon Sep 17 00:00:00 2001
From: Ignacio García Pérez <iggarpe@t2i.com>
Date: Fri, 23 May 2008 13:04:28 -0700
Subject: serial: support for InstaShield IS-400 four port RS-232 PCI card
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the InstaShield IS-400 four port RS-232 PCI card.

Signed-off-by: Ignacio García Pérez <iggarpe@t2i.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 7 ++++++-
 include/linux/pci_ids.h   | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 53fa19cf2f06..788c3559522d 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2602,7 +2602,12 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS200,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,	/* 135a.0811 */
 		pbn_b2_2_115200 },
-
+	/*
+	 * IntaShield IS-400
+	 */
+	{	PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,    /* 135a.0dc0 */
+		pbn_b2_4_115200 },
 	/*
 	 * Perle PCI-RAS cards
 	 */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 342b55afb0c3..9b940e644179 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1761,6 +1761,7 @@
 
 #define PCI_VENDOR_ID_INTASHIELD	0x135a
 #define PCI_DEVICE_ID_INTASHIELD_IS200	0x0d80
+#define PCI_DEVICE_ID_INTASHIELD_IS400	0x0dc0
 
 #define PCI_VENDOR_ID_QUATECH		0x135C
 #define PCI_DEVICE_ID_QUATECH_QSC100	0x0010
-- 
cgit v1.2.3


From 6c7c6afbb8c0e60d32a563cae7c6889211e9d9d8 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Fri, 23 May 2008 13:04:29 -0700
Subject: types.h: don't expose struct ustat to userspace

<linux/types.h> can't be used together with <sys/ustat.h> because they
both define struct ustat:

    $ cat test.c
    #include <sys/ustat.h>
    #include <linux/types.h>
    $ gcc -c test.c
    In file included from test.c:2:
    /usr/include/linux/types.h:165: error: redefinition of 'struct ustat'

has been reported a while ago to debian, but seems to have been
lost in cat fighting: http://bugs.debian.org/429064

Signed-off-by: maximilian attems <max@stro.at>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 9dc2346627b4..d4a9ce6e2760 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -197,8 +197,6 @@ typedef u64 resource_size_t;
 typedef u32 resource_size_t;
 #endif
 
-#endif	/* __KERNEL__ */
-
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
@@ -206,4 +204,6 @@ struct ustat {
 	char			f_fpack[6];
 };
 
+#endif	/* __KERNEL__ */
+
 #endif /* _LINUX_TYPES_H */
-- 
cgit v1.2.3


From 80119ef5c8153e0a6cc5edf00c083dc98a9bd348 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 23 May 2008 13:04:31 -0700
Subject: mm: fix atomic_t overflow in vm

The atomic_t type is 32bit but a 64bit system can have more than 2^32
pages of virtual address space available.  Without this we overflow on
ludicrously large mappings

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_misc.c  | 2 +-
 include/linux/mman.h | 4 ++--
 mm/mmap.c            | 4 ++--
 mm/nommu.c           | 4 ++--
 mm/swap.c            | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 74a323d2b850..32dc14cd8900 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 	si_meminfo(&i);
 	si_swapinfo(&i);
-	committed = atomic_read(&vm_committed_space);
+	committed = atomic_long_read(&vm_committed_space);
 	allowed = ((totalram_pages - hugetlb_total_pages())
 		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 87920a0852a3..dab8892e6ff1 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -17,14 +17,14 @@
 
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
-extern atomic_t vm_committed_space;
+extern atomic_long_t vm_committed_space;
 
 #ifdef CONFIG_SMP
 extern void vm_acct_memory(long pages);
 #else
 static inline void vm_acct_memory(long pages)
 {
-	atomic_add(pages, &vm_committed_space);
+	atomic_long_add(pages, &vm_committed_space);
 }
 #endif
 
diff --git a/mm/mmap.c b/mm/mmap.c
index fac66337da2a..669499e7c2f5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-atomic_t vm_committed_space = ATOMIC_INIT(0);
+atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
 
 /*
  * Check that a process has enough memory to allocate a new virtual
@@ -177,7 +177,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 	 * cast `allowed' as a signed long because vm_committed_space
 	 * sometimes has a negative value
 	 */
-	if (atomic_read(&vm_committed_space) < (long)allowed)
+	if (atomic_long_read(&vm_committed_space) < (long)allowed)
 		return 0;
 error:
 	vm_unacct_memory(pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index ef8c62cec697..dca93fcb8b7a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -39,7 +39,7 @@ struct page *mem_map;
 unsigned long max_mapnr;
 unsigned long num_physpages;
 unsigned long askedalloc, realalloc;
-atomic_t vm_committed_space = ATOMIC_INIT(0);
+atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50; /* default is 50% */
 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
@@ -1410,7 +1410,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 	 * cast `allowed' as a signed long because vm_committed_space
 	 * sometimes has a negative value
 	 */
-	if (atomic_read(&vm_committed_space) < (long)allowed)
+	if (atomic_long_read(&vm_committed_space) < (long)allowed)
 		return 0;
 error:
 	vm_unacct_memory(pages);
diff --git a/mm/swap.c b/mm/swap.c
index 91e194445a5e..45c9f25a8a3b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -503,7 +503,7 @@ void vm_acct_memory(long pages)
 	local = &__get_cpu_var(committed_space);
 	*local += pages;
 	if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
-		atomic_add(*local, &vm_committed_space);
+		atomic_long_add(*local, &vm_committed_space);
 		*local = 0;
 	}
 	preempt_enable();
@@ -520,7 +520,7 @@ static int cpu_swap_callback(struct notifier_block *nfb,
 
 	committed = &per_cpu(committed_space, (long)hcpu);
 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		atomic_add(*committed, &vm_committed_space);
+		atomic_long_add(*committed, &vm_committed_space);
 		*committed = 0;
 		drain_cpu_pagevecs((long)hcpu);
 	}
-- 
cgit v1.2.3


From 03de250a269bfa8e6a9e6ccb4a1dbce19dae8a61 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 23 May 2008 13:04:33 -0700
Subject: md: proper extern for mdp_major

This patch adds a proper extern for mdp_major in include/linux/raid/md.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/raid/md.h | 2 ++
 init/do_mounts_md.c     | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 81a1a02d4566..b7386ae9d288 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -72,6 +72,8 @@
  */
 #define MD_PATCHLEVEL_VERSION           3
 
+extern int mdp_major;
+
 extern int register_md_personality (struct mdk_personality *p);
 extern int unregister_md_personality (struct mdk_personality *p);
 extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 7473b0c59d4d..693d24694a6c 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -24,7 +24,6 @@ static struct {
 
 static int md_setup_ents __initdata;
 
-extern int mdp_major;
 /*
  * Parse the command-line parameters given our kernel, but do not
  * actually try to invoke the MD device now; that is handled by
-- 
cgit v1.2.3


From 6bcfd601861cce45ca73ac1d714f1286b6b3f0d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 23 May 2008 13:04:34 -0700
Subject: md: kill file_path wrapper

Kill the trivial and rather pointless file_path wrapper around d_path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/bitmap.c         | 17 ++++-------------
 drivers/md/md.c             |  4 ++--
 include/linux/raid/bitmap.h |  1 -
 3 files changed, 6 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index c14dacdacfac..b26927ce889c 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -203,17 +203,6 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
  * bitmap file handling - read and write the bitmap file and its superblock
  */
 
-/* copy the pathname of a file to a buffer */
-char *file_path(struct file *file, char *buf, int count)
-{
-	if (!buf)
-		return NULL;
-
-	buf = d_path(&file->f_path, buf, count);
-
-	return IS_ERR(buf) ? NULL : buf;
-}
-
 /*
  * basic page I/O operations
  */
@@ -721,11 +710,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
 		if (bitmap->file) {
 			path = kmalloc(PAGE_SIZE, GFP_KERNEL);
 			if (path)
-				ptr = file_path(bitmap->file, path, PAGE_SIZE);
+				ptr = d_path(&bitmap->file->f_path, path,
+					     PAGE_SIZE);
+
 
 			printk(KERN_ALERT
 			      "%s: kicking failed bitmap file %s from array!\n",
-			      bmname(bitmap), ptr ? ptr : "");
+			      bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
 
 			kfree(path);
 		} else
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 83eb78b00137..d31aa6f33a6a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3987,8 +3987,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg)
 	if (!buf)
 		goto out;
 
-	ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
-	if (!ptr)
+	ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
+	if (IS_ERR(ptr))
 		goto out;
 
 	strcpy(file->pathname, ptr);
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index 47fbcba11850..78bfdea24a8e 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -262,7 +262,6 @@ int  bitmap_create(mddev_t *mddev);
 void bitmap_flush(mddev_t *mddev);
 void bitmap_destroy(mddev_t *mddev);
 
-char *file_path(struct file *file, char *buf, int count);
 void bitmap_print_sb(struct bitmap *bitmap);
 void bitmap_update_sb(struct bitmap *bitmap);
 
-- 
cgit v1.2.3


From 90b08710e41a07d4ff0fb8940dcce3a552991a56 Mon Sep 17 00:00:00 2001
From: Bernd Schubert <bs@q-leap.de>
Date: Fri, 23 May 2008 13:04:38 -0700
Subject: md: allow parallel resync of md-devices.

In some configurations, a raid6 resync can be limited by CPU speed
(Calculating P and Q and moving data) rather than by device speed.  In
these cases there is nothing to be gained byt serialising resync of arrays
that share a device, and doing the resync in parallel can provide benefit.
 So add a sysfs tunable to flag an array as being allowed to resync in
parallel with other arrays that use (a different part of) the same device.

Signed-off-by: Bernd Schubert <bs@q-leap.de>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           | 40 ++++++++++++++++++++++++++++++++++++----
 include/linux/raid/md_k.h |  3 +++
 2 files changed, 39 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index e57213dacd25..295be1a68806 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
 
 static void md_print_devices(void);
 
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
 /*
@@ -3012,6 +3014,36 @@ degraded_show(mddev_t *mddev, char *page)
 }
 static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
 
+static ssize_t
+sync_force_parallel_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%d\n", mddev->parallel_resync);
+}
+
+static ssize_t
+sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	long n;
+
+	if (strict_strtol(buf, 10, &n))
+		return -EINVAL;
+
+	if (n != 0 && n != 1)
+		return -EINVAL;
+
+	mddev->parallel_resync = n;
+
+	if (mddev->sync_thread)
+		wake_up(&resync_wait);
+
+	return len;
+}
+
+/* force parallel resync, even with shared block devices */
+static struct md_sysfs_entry md_sync_force_parallel =
+__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
+       sync_force_parallel_show, sync_force_parallel_store);
+
 static ssize_t
 sync_speed_show(mddev_t *mddev, char *page)
 {
@@ -3187,6 +3219,7 @@ static struct attribute *md_redundancy_attrs[] = {
 	&md_sync_min.attr,
 	&md_sync_max.attr,
 	&md_sync_speed.attr,
+	&md_sync_force_parallel.attr,
 	&md_sync_completed.attr,
 	&md_max_sync.attr,
 	&md_suspend_lo.attr,
@@ -5487,8 +5520,6 @@ void md_allow_write(mddev_t *mddev)
 }
 EXPORT_SYMBOL_GPL(md_allow_write);
 
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-
 #define SYNC_MARKS	10
 #define	SYNC_MARK_STEP	(3*HZ)
 void md_do_sync(mddev_t *mddev)
@@ -5552,8 +5583,9 @@ void md_do_sync(mddev_t *mddev)
 		for_each_mddev(mddev2, tmp) {
 			if (mddev2 == mddev)
 				continue;
-			if (mddev2->curr_resync && 
-			    match_mddev_units(mddev,mddev2)) {
+			if (!mddev->parallel_resync
+			&&  mddev2->curr_resync
+			&&  match_mddev_units(mddev, mddev2)) {
 				DEFINE_WAIT(wq);
 				if (mddev < mddev2 && mddev->curr_resync == 2) {
 					/* arbitrarily yield */
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 812ffa590cff..a6d7ab688ede 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -180,6 +180,9 @@ struct mddev_s
 	int				sync_speed_min;
 	int				sync_speed_max;
 
+	/* resync even though the same disks are shared among md-devices */
+	int				parallel_resync;
+
 	int				ok_start_degraded;
 	/* recovery/resync flags 
 	 * NEEDED:   we might need to start a resync/recover
-- 
cgit v1.2.3


From dfc7064500061677720fa26352963c772d3ebe6b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 23 May 2008 13:04:39 -0700
Subject: md: restart recovery cleanly after device failure.

When we get any IO error during a recovery (rebuilding a spare), we abort
the recovery and restart it.

For RAID6 (and multi-drive RAID1) it may not be best to restart at the
beginning: when multiple failures can be tolerated, the recovery may be
able to continue and re-doing all that has already been done doesn't make
sense.

We already have the infrastructure to record where a recovery is up to
and restart from there, but it is not being used properly.
This is because:
  - We sometimes abort with MD_RECOVERY_ERR rather than just MD_RECOVERY_INTR,
    which causes the recovery not be be checkpointed.
  - We remove spares and then re-added them which loses important state
    information.

The distinction between MD_RECOVERY_ERR and MD_RECOVERY_INTR really isn't
needed.  If there is an error, the relevant drive will be marked as
Faulty, and that is enough to ensure correct handling of the error.  So we
first remove MD_RECOVERY_ERR, changing some of the uses of it to
MD_RECOVERY_INTR.

Then we cause the attempt to remove a non-faulty device from an array to
fail (unless recovery is impossible as the array is too degraded).  Then
when remove_and_add_spares attempts to remove the devices on which
recovery can continue, it will fail, they will remain in place, and
recovery will continue on them as desired.

Issue:  If we are halfway through rebuilding a spare and another drive
fails, and a new spare is immediately available,  do we want to:
 1/ complete the current rebuild, then go back and rebuild the new spare or
 2/ restart the rebuild from the start and rebuild both devices in
    parallel.

Both options can be argued for.  The code currently takes option 2 as
  a/ this requires least code change
  b/ this results in a minimally-degraded array in minimal time.

Cc: "Eivind Sarto" <ivan@kasenna.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           | 22 +++++++++++-----------
 drivers/md/multipath.c    |  3 ++-
 drivers/md/raid1.c        | 10 +++++++++-
 drivers/md/raid10.c       | 14 ++++++++++++--
 drivers/md/raid5.c        | 10 +++++++++-
 include/linux/raid/md_k.h |  4 +---
 6 files changed, 44 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 295be1a68806..51c19f86ff99 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5434,7 +5434,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
 	atomic_sub(blocks, &mddev->recovery_active);
 	wake_up(&mddev->recovery_wait);
 	if (!ok) {
-		set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
 		// stop recovery, signal do_sync ....
 	}
@@ -5690,7 +5690,7 @@ void md_do_sync(mddev_t *mddev)
 		sectors = mddev->pers->sync_request(mddev, j, &skipped,
 						  currspeed < speed_min(mddev));
 		if (sectors == 0) {
-			set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			goto out;
 		}
 
@@ -5713,8 +5713,7 @@ void md_do_sync(mddev_t *mddev)
 
 		last_check = io_sectors;
 
-		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
-		    test_bit(MD_RECOVERY_ERR, &mddev->recovery))
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
 			break;
 
 	repeat:
@@ -5768,8 +5767,7 @@ void md_do_sync(mddev_t *mddev)
 	/* tell personality that we are finished */
 	mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
 
-	if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
-	    !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
+	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
 	    mddev->curr_resync > 2) {
 		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
 			if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5838,7 +5836,10 @@ static int remove_and_add_spares(mddev_t *mddev)
 		}
 
 	if (mddev->degraded) {
-		rdev_for_each(rdev, rtmp, mddev)
+		rdev_for_each(rdev, rtmp, mddev) {
+			if (rdev->raid_disk >= 0 &&
+			    !test_bit(In_sync, &rdev->flags))
+				spares++;
 			if (rdev->raid_disk < 0
 			    && !test_bit(Faulty, &rdev->flags)) {
 				rdev->recovery_offset = 0;
@@ -5856,6 +5857,7 @@ static int remove_and_add_spares(mddev_t *mddev)
 				} else
 					break;
 			}
+		}
 	}
 	return spares;
 }
@@ -5869,7 +5871,7 @@ static int remove_and_add_spares(mddev_t *mddev)
  * to do that as needed.
  * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
  * "->recovery" and create a thread at ->sync_thread.
- * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR)
+ * When the thread finishes it sets MD_RECOVERY_DONE
  * and wakeups up this thread which will reap the thread and finish up.
  * This thread also removes any faulty devices (with nr_pending == 0).
  *
@@ -5944,8 +5946,7 @@ void md_check_recovery(mddev_t *mddev)
 			/* resync has finished, collect result */
 			md_unregister_thread(mddev->sync_thread);
 			mddev->sync_thread = NULL;
-			if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
-			    !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+			if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 				/* success...*/
 				/* activate any spares */
 				mddev->pers->spare_active(mddev);
@@ -5969,7 +5970,6 @@ void md_check_recovery(mddev_t *mddev)
 		 * might be left set
 		 */
 		clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-		clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
 		clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
 
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 4f4d1f383842..e968116e0de9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
 	if (rdev) {
 		if (test_bit(In_sync, &rdev->flags) ||
 		    atomic_read(&rdev->nr_pending)) {
-			printk(KERN_ERR "hot-remove-disk, slot %d is identified"				" but is still operational!\n", number);
+			printk(KERN_ERR "hot-remove-disk, slot %d is identified"
+			       " but is still operational!\n", number);
 			err = -EBUSY;
 			goto abort;
 		}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index d0f4021bbc2e..c610b947218a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 		/*
 		 * if recovery is running, make sure it aborts.
 		 */
-		set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	} else
 		set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1148,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
 			err = -EBUSY;
 			goto abort;
 		}
+		/* Only remove non-faulty devices is recovery
+		 * is not possible.
+		 */
+		if (!test_bit(Faulty, &rdev->flags) &&
+		    mddev->degraded < conf->raid_disks) {
+			err = -EBUSY;
+			goto abort;
+		}
 		p->rdev = NULL;
 		synchronize_rcu();
 		if (atomic_read(&rdev->nr_pending)) {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8536ede1e712..1de17da34a95 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1020,7 +1020,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 		/*
 		 * if recovery is running, make sure it aborts.
 		 */
-		set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	}
 	set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1171,6 +1171,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
 			err = -EBUSY;
 			goto abort;
 		}
+		/* Only remove faulty devices in recovery
+		 * is not possible.
+		 */
+		if (!test_bit(Faulty, &rdev->flags) &&
+		    enough(conf)) {
+			err = -EBUSY;
+			goto abort;
+		}
 		p->rdev = NULL;
 		synchronize_rcu();
 		if (atomic_read(&rdev->nr_pending)) {
@@ -1237,6 +1245,7 @@ static void end_sync_write(struct bio *bio, int error)
 
 	if (!uptodate)
 		md_error(mddev, conf->mirrors[d].rdev);
+
 	update_head_pos(i, r10_bio);
 
 	while (atomic_dec_and_test(&r10_bio->remaining)) {
@@ -1844,7 +1853,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 					if (rb2)
 						atomic_dec(&rb2->remaining);
 					r10_bio = rb2;
-					if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
+					if (!test_and_set_bit(MD_RECOVERY_INTR,
+							      &mddev->recovery))
 						printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
 						       mdname(mddev));
 					break;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2f28745dacf9..425958a76b84 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1268,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 			/*
 			 * if recovery was running, make sure it aborts.
 			 */
-			set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		}
 		set_bit(Faulty, &rdev->flags);
 		printk (KERN_ALERT
@@ -4574,6 +4574,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
 			err = -EBUSY;
 			goto abort;
 		}
+		/* Only remove non-faulty devices if recovery
+		 * isn't possible.
+		 */
+		if (!test_bit(Faulty, &rdev->flags) &&
+		    mddev->degraded <= conf->max_degraded) {
+			err = -EBUSY;
+			goto abort;
+		}
 		p->rdev = NULL;
 		synchronize_rcu();
 		if (atomic_read(&rdev->nr_pending)) {
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index a6d7ab688ede..3dea9f545c8f 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -188,8 +188,7 @@ struct mddev_s
 	 * NEEDED:   we might need to start a resync/recover
 	 * RUNNING:  a thread is running, or about to be started
 	 * SYNC:     actually doing a resync, not a recovery
-	 * ERR:      and IO error was detected - abort the resync/recovery
-	 * INTR:     someone requested a (clean) early abort.
+	 * INTR:     resync needs to be aborted for some reason
 	 * DONE:     thread is done and is waiting to be reaped
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
 	 * CHECK:    user-space request for for check-only, no repair
@@ -199,7 +198,6 @@ struct mddev_s
 	 */
 #define	MD_RECOVERY_RUNNING	0
 #define	MD_RECOVERY_SYNC	1
-#define	MD_RECOVERY_ERR		2
 #define	MD_RECOVERY_INTR	3
 #define	MD_RECOVERY_DONE	4
 #define	MD_RECOVERY_NEEDED	5
-- 
cgit v1.2.3


From cdc83ae2453ddb19060e05e6afd22b1254128c42 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-fbdev@fluff.org>
Date: Fri, 23 May 2008 13:04:53 -0700
Subject: SM501: reverse FPEN/VBIASEN flags behaviour

To keep backwards compatibility, reverse the meanings of these flags so
that when they are not set, the driver uses the original behvaiour.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <arnaud.patard@rtp-net.org>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/sm501fb.c | 8 ++++----
 include/linux/sm501.h   | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 742b5c656d66..15d4a768b1f6 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -663,14 +663,14 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 		sm501fb_sync_regs(fbi);
 		mdelay(10);
 
-		if (pd->flags & SM501FB_FLAG_PANEL_USE_VBIASEN) {
+		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_VBIASEN)) {
 			control |= SM501_DC_PANEL_CONTROL_BIAS;	/* VBIASEN */
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
 		}
 
-		if (pd->flags & SM501FB_FLAG_PANEL_USE_FPEN) {
+		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_FPEN)) {
 			control |= SM501_DC_PANEL_CONTROL_FPEN;
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
@@ -678,14 +678,14 @@ static void sm501fb_panel_power(struct sm501fb_info *fbi, int to)
 		}
 	} else if (!to && (control & SM501_DC_PANEL_CONTROL_VDD) != 0) {
 		/* disable panel power */
-		if (pd->flags & SM501FB_FLAG_PANEL_USE_FPEN) {
+		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_FPEN)) {
 			control &= ~SM501_DC_PANEL_CONTROL_FPEN;
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
 			mdelay(10);
 		}
 
-		if (pd->flags & SM501FB_FLAG_PANEL_USE_VBIASEN) {
+		if (!(pd->flags & SM501FB_FLAG_PANEL_NO_VBIASEN)) {
 			control &= ~SM501_DC_PANEL_CONTROL_BIAS;
 			writel(control, ctrl_reg);
 			sm501fb_sync_regs(fbi);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index bca134544700..95c1c39ba445 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -71,8 +71,8 @@ extern unsigned long sm501_gpio_get(struct device *dev,
 #define SM501FB_FLAG_DISABLE_AT_EXIT	(1<<1)
 #define SM501FB_FLAG_USE_HWCURSOR	(1<<2)
 #define SM501FB_FLAG_USE_HWACCEL	(1<<3)
-#define SM501FB_FLAG_PANEL_USE_FPEN	(1<<4)
-#define SM501FB_FLAG_PANEL_USE_VBIASEN	(1<<5)
+#define SM501FB_FLAG_PANEL_NO_FPEN	(1<<4)
+#define SM501FB_FLAG_PANEL_NO_VBIASEN	(1<<5)
 
 struct sm501_platdata_fbsub {
 	struct fb_videomode	*def_mode;
-- 
cgit v1.2.3


From 6ea0205b56546cef782b74d9f4664ec00290a6ae Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 23 May 2008 13:04:58 -0700
Subject: gpio: build fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes various gpio-related build errors (mostly potential)
reported in part by Russell King and Uwe Kleine-König.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Arnaud Patard <arnaud.patard@rtp-net.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/gpio.h | 6 +++++-
 include/linux/gpio.h       | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index ecf675a59d21..6be061d09da9 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -1,8 +1,12 @@
 #ifndef _ASM_GENERIC_GPIO_H
 #define _ASM_GENERIC_GPIO_H
 
+#include <linux/types.h>
+
 #ifdef CONFIG_HAVE_GPIO_LIB
 
+#include <linux/compiler.h>
+
 /* Platforms may implement their GPIO interface with library code,
  * at a small performance cost for non-inlined operations and some
  * extra memory (for code and for per-GPIO table entries).
@@ -74,7 +78,7 @@ struct gpio_chip {
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 			unsigned offset);
-extern int __init __must_check gpiochip_reserve(int start, int ngpio);
+extern int __must_check gpiochip_reserve(int start, int ngpio);
 
 /* add/remove chips */
 extern int gpiochip_add(struct gpio_chip *chip);
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 4987a84078ef..98be6c5762b9 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -8,6 +8,9 @@
 
 #else
 
+#include <linux/types.h>
+#include <linux/errno.h>
+
 /*
  * Some platforms don't support the GPIO programming interface.
  *
-- 
cgit v1.2.3


From 12d15f0d51d47cec39d1d7250e81573c5cbd8b5d Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 23 May 2008 13:05:01 -0700
Subject: for_each_online_pgdat(): kerneldoc fix

for_each_pgdat() was renamed to for_each_online_pgdat() and kerneldoc
comments should be updated accordingly.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c463cd8a15a4..443bc7cd8c62 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -703,7 +703,7 @@ extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
 extern struct zone *next_zone(struct zone *zone);
 
 /**
- * for_each_pgdat - helper macro to iterate over all nodes
+ * for_each_online_pgdat - helper macro to iterate over all online nodes
  * @pgdat - pointer to a pg_data_t variable
  */
 #define for_each_online_pgdat(pgdat)			\
-- 
cgit v1.2.3


From 2548baa07ddf37ea8604e9627f042616d1cdc43e Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 26 May 2008 16:08:40 +0200
Subject: i2c: Align i2c_device_id

Align i2c_device_id.driver_data to 8 bytes to not fail on crossbuilds.

(Added in d2653e92732bd3911feff6bee5e23dbf959381db.)

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/mod_devicetable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index d73eceaa7afb..69b2342d5ebb 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -375,7 +375,8 @@ struct virtio_device_id {
 
 struct i2c_device_id {
 	char name[I2C_NAME_SIZE];
-	kernel_ulong_t driver_data;	/* Data private to the driver */
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
 
-- 
cgit v1.2.3


From 624080eded68738daee041ad64672a9d2614754f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 6 Jun 2008 17:50:40 -0400
Subject: jbd2: If a journal checksum error is detected, propagate the error to
 ext4

If a journal checksum error is detected, the ext4 filesystem will call
ext4_error(), and the mount will either continue, become a read-only
mount, or cause a kernel panic based on the superblock flags
indicating the user's preference of what to do in case of filesystem
corruption being detected.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c      | 23 +++++++++++++++++++++++
 fs/jbd2/recovery.c   | 11 ++++-------
 include/linux/jbd2.h |  3 +++
 3 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 09d9359c8055..d01a32e8b50a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2189,6 +2189,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
 		if (ext4_load_journal(sb, es, journal_devnum))
 			goto failed_mount3;
+		if (!(sb->s_flags & MS_RDONLY) &&
+		    EXT4_SB(sb)->s_journal->j_failed_commit) {
+			printk(KERN_CRIT "EXT4-fs error (device %s): "
+			       "ext4_fill_super: Journal transaction "
+			       "%u is corrupt\n", sb->s_id, 
+			       EXT4_SB(sb)->s_journal->j_failed_commit);
+			if (test_opt (sb, ERRORS_RO)) {
+				printk (KERN_CRIT
+					"Mounting filesystem read-only\n");
+				sb->s_flags |= MS_RDONLY;
+				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+			}
+			if (test_opt(sb, ERRORS_PANIC)) {
+				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+				ext4_commit_super(sb, es, 1);
+				printk(KERN_CRIT
+				       "EXT4-fs (device %s): mount failed\n",
+				      sb->s_id);
+				goto failed_mount4;
+			}
+		}
 	} else if (journal_inum) {
 		if (ext4_create_journal(sb, es, journal_inum))
 			goto failed_mount3;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 7199db52b2fd..058f50f65b76 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -611,9 +611,8 @@ static int do_one_pass(journal_t *journal,
 				chksum_err = chksum_seen = 0;
 
 				if (info->end_transaction) {
-					printk(KERN_ERR "JBD: Transaction %u "
-						"found to be corrupt.\n",
-						next_commit_ID - 1);
+					journal->j_failed_commit =
+						info->end_transaction;
 					brelse(bh);
 					break;
 				}
@@ -644,10 +643,8 @@ static int do_one_pass(journal_t *journal,
 
 					if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
 					   JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
-						printk(KERN_ERR
-						       "JBD: Transaction %u "
-						       "found to be corrupt.\n",
-						       next_commit_ID);
+						journal->j_failed_commit =
+							next_commit_ID;
 						brelse(bh);
 						break;
 					}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 05e2b307161a..d147f0f90360 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -919,6 +919,9 @@ struct journal_s
 	struct proc_dir_entry	*j_proc_entry;
 	struct transaction_stats_s j_stats;
 
+	/* Failed journal commit ID */
+	unsigned int		j_failed_commit;
+
 	/*
 	 * An opaque pointer to fs-private information.  ext3 puts its
 	 * superblock pointer here
-- 
cgit v1.2.3


From cbaffba12ce08beb3e80bfda148ee0fa14aac188 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 26 May 2008 20:55:42 +0400
Subject: posix timers: discard SI_TIMER signals on exec

Based on Roland's patch. This approach was suggested by Austin Clements
from the very beginning, and then by Linus.

As Austin pointed out, the execing task can be killed by SI_TIMER signal
because exec flushes the signal handlers, but doesn't discard the pending
signals generated by posix timers. Perhaps not a bug, but people find this
surprising. See http://bugzilla.kernel.org/show_bug.cgi?id=10460

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Austin Clements <amdragon+kernelbugzilla@mit.edu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c             |  1 +
 include/linux/sched.h |  2 ++
 kernel/signal.c       | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 3c2ba7ce11d4..9448f1b50b4a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -860,6 +860,7 @@ static int de_thread(struct task_struct *tsk)
 
 no_thread_group:
 	exit_itimers(sig);
+	flush_itimer_signals();
 	if (leader)
 		release_task(leader);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5395a6176f4b..3e05e5474749 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1848,7 +1848,9 @@ extern void exit_thread(void);
 extern void exit_files(struct task_struct *);
 extern void __cleanup_signal(struct signal_struct *);
 extern void __cleanup_sighand(struct sighand_struct *);
+
 extern void exit_itimers(struct signal_struct *);
+extern void flush_itimer_signals(void);
 
 extern NORET_TYPE void do_group_exit(int);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 2955f6c4f36e..6c0958e52ea7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -231,6 +231,40 @@ void flush_signals(struct task_struct *t)
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 }
 
+static void __flush_itimer_signals(struct sigpending *pending)
+{
+	sigset_t signal, retain;
+	struct sigqueue *q, *n;
+
+	signal = pending->signal;
+	sigemptyset(&retain);
+
+	list_for_each_entry_safe(q, n, &pending->list, list) {
+		int sig = q->info.si_signo;
+
+		if (likely(q->info.si_code != SI_TIMER)) {
+			sigaddset(&retain, sig);
+		} else {
+			sigdelset(&signal, sig);
+			list_del_init(&q->list);
+			__sigqueue_free(q);
+		}
+	}
+
+	sigorsets(&pending->signal, &signal, &retain);
+}
+
+void flush_itimer_signals(void)
+{
+	struct task_struct *tsk = current;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tsk->sighand->siglock, flags);
+	__flush_itimer_signals(&tsk->pending);
+	__flush_itimer_signals(&tsk->signal->shared_pending);
+	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+}
+
 void ignore_signals(struct task_struct *t)
 {
 	int i;
-- 
cgit v1.2.3


From 43f83a8f9963a11a9c3f41beecc363da21ae3602 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 27 May 2008 01:37:26 -0400
Subject: Input: wm9713 - support five wire panels

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/wm9713.c | 22 ++++++++++++++++++++++
 include/linux/wm97xx.h             |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c
index 01278bd7e65c..838458792ea0 100644
--- a/drivers/input/touchscreen/wm9713.c
+++ b/drivers/input/touchscreen/wm9713.c
@@ -84,6 +84,15 @@ static int delay = 4;
 module_param(delay, int, 0);
 MODULE_PARM_DESC(delay, "Set adc sample delay.");
 
+/*
+ * Set five_wire = 1 to use a 5 wire touchscreen.
+ *
+ * NOTE: Five wire mode does not allow for readback of pressure.
+ */
+static int five_wire;
+module_param(five_wire, int, 0);
+MODULE_PARM_DESC(five_wire, "Set to '1' to use 5-wire touchscreen.");
+
 /*
  * Set adc mask function.
  *
@@ -162,6 +171,19 @@ static void wm9713_phy_init(struct wm97xx *wm)
 			 64000 / rpu);
 	}
 
+	/* Five wire panel? */
+	if (five_wire) {
+		dig3 |= WM9713_45W;
+		dev_info(wm->dev, "setting 5-wire touchscreen mode.");
+
+		if (pil) {
+			dev_warn(wm->dev,
+				 "Pressure measurement not supported in 5 "
+				 "wire mode, disabling\n");
+			pil = 0;
+		}
+	}
+
 	/* touchpanel pressure */
 	if (pil == 2) {
 		dig3 |= WM9712_PIL;
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 4d13732e9cf0..6f69968eab24 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -100,6 +100,7 @@
 #define WM9713_ADCSEL_Y		0x0004	/* Y measurement */
 #define WM9713_ADCSEL_PRES	0x0008	/* Pressure measurement */
 #define WM9713_COO		0x0001	/* enable coordinate mode */
+#define WM9713_45W		0x1000  /* set for 5 wire panel */
 #define WM9713_PDEN		0x0800	/* measure only when pen down */
 #define WM9713_ADCSEL_MASK	0x00fe	/* ADC selection mask */
 #define WM9713_WAIT		0x0200	/* coordinate wait */
-- 
cgit v1.2.3


From 9d5f09a424a67ddb959829894efb4c71cbf6d600 Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Tue, 27 May 2008 14:54:41 +0200
Subject: Added in MESSAGE notes for blktraces

Allows messages to be inserted into blktrace streams.

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blktrace.c             | 14 ++++++++++++++
 include/linux/blktrace_api.h | 25 +++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/block/blktrace.c b/block/blktrace.c
index b2cbb4e5d767..20e11f354f11 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -75,6 +75,20 @@ static void trace_note_time(struct blk_trace *bt)
 	local_irq_restore(flags);
 }
 
+void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
+{
+	int n;
+	va_list args;
+	static char bt_msg_buf[BLK_TN_MAX_MSG];
+
+	va_start(args, fmt);
+	n = vscnprintf(bt_msg_buf, BLK_TN_MAX_MSG, fmt, args);
+	va_end(args);
+
+	trace_note(bt, 0, BLK_TN_MESSAGE, bt_msg_buf, n);
+}
+EXPORT_SYMBOL_GPL(__trace_note_message);
+
 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 			 pid_t pid)
 {
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cfc3147e5cf9..b7cd8f1eedbe 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -55,6 +55,7 @@ enum blktrace_act {
 enum blktrace_notify {
 	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
 	__BLK_TN_TIMESTAMP,		/* include system clock */
+	__BLK_TN_MESSAGE,		/* Character string message */
 };
 
 
@@ -79,6 +80,7 @@ enum blktrace_notify {
 
 #define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
 #define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_MESSAGE		(__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
 
 #define BLK_IO_TRACE_MAGIC	0x65617400
 #define BLK_IO_TRACE_VERSION	0x07
@@ -149,7 +151,28 @@ extern void blk_trace_shutdown(struct request_queue *);
 extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
 extern int do_blk_trace_setup(struct request_queue *q,
 	char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
+/**
+ * blk_add_trace_msg - Add a (simple) message to the blktrace stream
+ * @q:		queue the io is for
+ * @fmt:	format to print message in
+ * args...	Variable argument list for format
+ *
+ * Description:
+ *     Records a (simple) message onto the blktrace stream.
+ *
+ *     NOTE: BLK_TN_MAX_MSG characters are output at most.
+ *     NOTE: Can not use 'static inline' due to presence of var args...
+ *
+ **/
+#define blk_add_trace_msg(q, fmt, ...)					\
+	do {								\
+		struct blk_trace *bt = (q)->blk_trace;			\
+		if (unlikely(bt))					\
+			__trace_note_message(bt, fmt, ##__VA_ARGS__);	\
+	} while (0)
+#define BLK_TN_MAX_MSG		1024
 
 /**
  * blk_add_trace_rq - Add a trace for a request oriented action
@@ -299,6 +322,8 @@ extern int blk_trace_remove(struct request_queue *q);
 #define blk_trace_setup(q, name, dev, arg)	(-ENOTTY)
 #define blk_trace_startstop(q, start)		(-ENOTTY)
 #define blk_trace_remove(q)			(-ENOTTY)
+#define blk_add_trace_msg(q, fmt, ...)		do { } while (0)
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 #endif /* __KERNEL__ */
 #endif
-- 
cgit v1.2.3


From 64565911cdb57c2f512a9715b985b5617402cc67 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 28 May 2008 14:45:33 +0200
Subject: block: make blktrace use per-cpu buffers for message notes

Currently it uses a single static char array, but that risks
being corrupted when multiple users issue message notes at the
same time. Make the buffers dynamically allocated when the trace
is setup and make them per-cpu instead.

The default max message size of 1k is also very large, the
interface is mainly for small text notes. So shrink it to 128 bytes.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blktrace.c             | 15 ++++++++++++---
 include/linux/blktrace_api.h |  3 ++-
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blktrace.c b/block/blktrace.c
index 20e11f354f11..7ae87cc4a163 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -79,13 +79,16 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
 {
 	int n;
 	va_list args;
-	static char bt_msg_buf[BLK_TN_MAX_MSG];
+	char *buf;
 
+	preempt_disable();
+	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
 	va_start(args, fmt);
-	n = vscnprintf(bt_msg_buf, BLK_TN_MAX_MSG, fmt, args);
+	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
 	va_end(args);
 
-	trace_note(bt, 0, BLK_TN_MESSAGE, bt_msg_buf, n);
+	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(__trace_note_message);
 
@@ -246,6 +249,7 @@ static void blk_trace_cleanup(struct blk_trace *bt)
 	debugfs_remove(bt->dropped_file);
 	blk_remove_tree(bt->dir);
 	free_percpu(bt->sequence);
+	free_percpu(bt->msg_data);
 	kfree(bt);
 }
 
@@ -360,6 +364,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (!bt->sequence)
 		goto err;
 
+	bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
+	if (!bt->msg_data)
+		goto err;
+
 	ret = -ENOENT;
 	dir = blk_create_tree(buts->name);
 	if (!dir)
@@ -406,6 +414,7 @@ err:
 		if (bt->dropped_file)
 			debugfs_remove(bt->dropped_file);
 		free_percpu(bt->sequence);
+		free_percpu(bt->msg_data);
 		if (bt->rchan)
 			relay_close(bt->rchan);
 		kfree(bt);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index b7cd8f1eedbe..e3ef903aae88 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -121,6 +121,7 @@ struct blk_trace {
 	int trace_state;
 	struct rchan *rchan;
 	unsigned long *sequence;
+	unsigned char *msg_data;
 	u16 act_mask;
 	u64 start_lba;
 	u64 end_lba;
@@ -172,7 +173,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 		if (unlikely(bt))					\
 			__trace_note_message(bt, fmt, ##__VA_ARGS__);	\
 	} while (0)
-#define BLK_TN_MAX_MSG		1024
+#define BLK_TN_MAX_MSG		128
 
 /**
  * blk_add_trace_rq - Add a trace for a request oriented action
-- 
cgit v1.2.3


From 6363ca57c76b7b83639ca8c83fc285fa26a7880e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 29 May 2008 11:28:57 +0200
Subject: revert ("sched: fair-group: SMP-nice for group scheduling")

Yanmin Zhang reported:

Comparing with 2.6.25, volanoMark has big regression with kernel 2.6.26-rc1.
It's about 50% on my 8-core stoakley, 16-core tigerton, and Itanium Montecito.

With bisect, I located the following patch:

| 18d95a2832c1392a2d63227a7a6d433cb9f2037e is first bad commit
| commit 18d95a2832c1392a2d63227a7a6d433cb9f2037e
| Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
| Date:   Sat Apr 19 19:45:00 2008 +0200
|
|     sched: fair-group: SMP-nice for group scheduling

Revert it so that we get v2.6.25 behavior.

Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   1 -
 kernel/sched.c        | 430 ++++----------------------------------------------
 kernel/sched_debug.c  |   5 -
 kernel/sched_fair.c   | 124 ++++++---------
 kernel/sched_rt.c     |   4 -
 5 files changed, 75 insertions(+), 489 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5395a6176f4b..8a888499954e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -766,7 +766,6 @@ struct sched_domain {
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
 	cpumask_t span;			/* span of all CPUs in this domain */
-	int first_cpu;			/* cache of the first cpu in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
diff --git a/kernel/sched.c b/kernel/sched.c
index 3dc13f05b10e..bfb8ad8ed171 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -398,43 +398,6 @@ struct cfs_rq {
 	 */
 	struct list_head leaf_cfs_rq_list;
 	struct task_group *tg;	/* group that "owns" this runqueue */
-
-#ifdef CONFIG_SMP
-	unsigned long task_weight;
-	unsigned long shares;
-	/*
-	 * We need space to build a sched_domain wide view of the full task
-	 * group tree, in order to avoid depending on dynamic memory allocation
-	 * during the load balancing we place this in the per cpu task group
-	 * hierarchy. This limits the load balancing to one instance per cpu,
-	 * but more should not be needed anyway.
-	 */
-	struct aggregate_struct {
-		/*
-		 *   load = weight(cpus) * f(tg)
-		 *
-		 * Where f(tg) is the recursive weight fraction assigned to
-		 * this group.
-		 */
-		unsigned long load;
-
-		/*
-		 * part of the group weight distributed to this span.
-		 */
-		unsigned long shares;
-
-		/*
-		 * The sum of all runqueue weights within this span.
-		 */
-		unsigned long rq_weight;
-
-		/*
-		 * Weight contributed by tasks; this is the part we can
-		 * influence by moving tasks around.
-		 */
-		unsigned long task_weight;
-	} aggregate;
-#endif
 #endif
 };
 
@@ -1508,326 +1471,6 @@ static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
 static unsigned long cpu_avg_load_per_task(int cpu);
 static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-/*
- * Group load balancing.
- *
- * We calculate a few balance domain wide aggregate numbers; load and weight.
- * Given the pictures below, and assuming each item has equal weight:
- *
- *         root          1 - thread
- *         / | \         A - group
- *        A  1  B
- *       /|\   / \
- *      C 2 D 3   4
- *      |   |
- *      5   6
- *
- * load:
- *    A and B get 1/3-rd of the total load. C and D get 1/3-rd of A's 1/3-rd,
- *    which equals 1/9-th of the total load.
- *
- * shares:
- *    The weight of this group on the selected cpus.
- *
- * rq_weight:
- *    Direct sum of all the cpu's their rq weight, e.g. A would get 3 while
- *    B would get 2.
- *
- * task_weight:
- *    Part of the rq_weight contributed by tasks; all groups except B would
- *    get 1, B gets 2.
- */
-
-static inline struct aggregate_struct *
-aggregate(struct task_group *tg, struct sched_domain *sd)
-{
-	return &tg->cfs_rq[sd->first_cpu]->aggregate;
-}
-
-typedef void (*aggregate_func)(struct task_group *, struct sched_domain *);
-
-/*
- * Iterate the full tree, calling @down when first entering a node and @up when
- * leaving it for the final time.
- */
-static
-void aggregate_walk_tree(aggregate_func down, aggregate_func up,
-			 struct sched_domain *sd)
-{
-	struct task_group *parent, *child;
-
-	rcu_read_lock();
-	parent = &root_task_group;
-down:
-	(*down)(parent, sd);
-	list_for_each_entry_rcu(child, &parent->children, siblings) {
-		parent = child;
-		goto down;
-
-up:
-		continue;
-	}
-	(*up)(parent, sd);
-
-	child = parent;
-	parent = parent->parent;
-	if (parent)
-		goto up;
-	rcu_read_unlock();
-}
-
-/*
- * Calculate the aggregate runqueue weight.
- */
-static
-void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd)
-{
-	unsigned long rq_weight = 0;
-	unsigned long task_weight = 0;
-	int i;
-
-	for_each_cpu_mask(i, sd->span) {
-		rq_weight += tg->cfs_rq[i]->load.weight;
-		task_weight += tg->cfs_rq[i]->task_weight;
-	}
-
-	aggregate(tg, sd)->rq_weight = rq_weight;
-	aggregate(tg, sd)->task_weight = task_weight;
-}
-
-/*
- * Compute the weight of this group on the given cpus.
- */
-static
-void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd)
-{
-	unsigned long shares = 0;
-	int i;
-
-	for_each_cpu_mask(i, sd->span)
-		shares += tg->cfs_rq[i]->shares;
-
-	if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares)
-		shares = tg->shares;
-
-	aggregate(tg, sd)->shares = shares;
-}
-
-/*
- * Compute the load fraction assigned to this group, relies on the aggregate
- * weight and this group's parent's load, i.e. top-down.
- */
-static
-void aggregate_group_load(struct task_group *tg, struct sched_domain *sd)
-{
-	unsigned long load;
-
-	if (!tg->parent) {
-		int i;
-
-		load = 0;
-		for_each_cpu_mask(i, sd->span)
-			load += cpu_rq(i)->load.weight;
-
-	} else {
-		load = aggregate(tg->parent, sd)->load;
-
-		/*
-		 * shares is our weight in the parent's rq so
-		 * shares/parent->rq_weight gives our fraction of the load
-		 */
-		load *= aggregate(tg, sd)->shares;
-		load /= aggregate(tg->parent, sd)->rq_weight + 1;
-	}
-
-	aggregate(tg, sd)->load = load;
-}
-
-static void __set_se_shares(struct sched_entity *se, unsigned long shares);
-
-/*
- * Calculate and set the cpu's group shares.
- */
-static void
-__update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd,
-			  int tcpu)
-{
-	int boost = 0;
-	unsigned long shares;
-	unsigned long rq_weight;
-
-	if (!tg->se[tcpu])
-		return;
-
-	rq_weight = tg->cfs_rq[tcpu]->load.weight;
-
-	/*
-	 * If there are currently no tasks on the cpu pretend there is one of
-	 * average load so that when a new task gets to run here it will not
-	 * get delayed by group starvation.
-	 */
-	if (!rq_weight) {
-		boost = 1;
-		rq_weight = NICE_0_LOAD;
-	}
-
-	/*
-	 *           \Sum shares * rq_weight
-	 * shares =  -----------------------
-	 *               \Sum rq_weight
-	 *
-	 */
-	shares = aggregate(tg, sd)->shares * rq_weight;
-	shares /= aggregate(tg, sd)->rq_weight + 1;
-
-	/*
-	 * record the actual number of shares, not the boosted amount.
-	 */
-	tg->cfs_rq[tcpu]->shares = boost ? 0 : shares;
-
-	if (shares < MIN_SHARES)
-		shares = MIN_SHARES;
-	else if (shares > MAX_SHARES)
-		shares = MAX_SHARES;
-
-	__set_se_shares(tg->se[tcpu], shares);
-}
-
-/*
- * Re-adjust the weights on the cpu the task came from and on the cpu the
- * task went to.
- */
-static void
-__move_group_shares(struct task_group *tg, struct sched_domain *sd,
-		    int scpu, int dcpu)
-{
-	unsigned long shares;
-
-	shares = tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares;
-
-	__update_group_shares_cpu(tg, sd, scpu);
-	__update_group_shares_cpu(tg, sd, dcpu);
-
-	/*
-	 * ensure we never loose shares due to rounding errors in the
-	 * above redistribution.
-	 */
-	shares -= tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares;
-	if (shares)
-		tg->cfs_rq[dcpu]->shares += shares;
-}
-
-/*
- * Because changing a group's shares changes the weight of the super-group
- * we need to walk up the tree and change all shares until we hit the root.
- */
-static void
-move_group_shares(struct task_group *tg, struct sched_domain *sd,
-		  int scpu, int dcpu)
-{
-	while (tg) {
-		__move_group_shares(tg, sd, scpu, dcpu);
-		tg = tg->parent;
-	}
-}
-
-static
-void aggregate_group_set_shares(struct task_group *tg, struct sched_domain *sd)
-{
-	unsigned long shares = aggregate(tg, sd)->shares;
-	int i;
-
-	for_each_cpu_mask(i, sd->span) {
-		struct rq *rq = cpu_rq(i);
-		unsigned long flags;
-
-		spin_lock_irqsave(&rq->lock, flags);
-		__update_group_shares_cpu(tg, sd, i);
-		spin_unlock_irqrestore(&rq->lock, flags);
-	}
-
-	aggregate_group_shares(tg, sd);
-
-	/*
-	 * ensure we never loose shares due to rounding errors in the
-	 * above redistribution.
-	 */
-	shares -= aggregate(tg, sd)->shares;
-	if (shares) {
-		tg->cfs_rq[sd->first_cpu]->shares += shares;
-		aggregate(tg, sd)->shares += shares;
-	}
-}
-
-/*
- * Calculate the accumulative weight and recursive load of each task group
- * while walking down the tree.
- */
-static
-void aggregate_get_down(struct task_group *tg, struct sched_domain *sd)
-{
-	aggregate_group_weight(tg, sd);
-	aggregate_group_shares(tg, sd);
-	aggregate_group_load(tg, sd);
-}
-
-/*
- * Rebalance the cpu shares while walking back up the tree.
- */
-static
-void aggregate_get_up(struct task_group *tg, struct sched_domain *sd)
-{
-	aggregate_group_set_shares(tg, sd);
-}
-
-static DEFINE_PER_CPU(spinlock_t, aggregate_lock);
-
-static void __init init_aggregate(void)
-{
-	int i;
-
-	for_each_possible_cpu(i)
-		spin_lock_init(&per_cpu(aggregate_lock, i));
-}
-
-static int get_aggregate(struct sched_domain *sd)
-{
-	if (!spin_trylock(&per_cpu(aggregate_lock, sd->first_cpu)))
-		return 0;
-
-	aggregate_walk_tree(aggregate_get_down, aggregate_get_up, sd);
-	return 1;
-}
-
-static void put_aggregate(struct sched_domain *sd)
-{
-	spin_unlock(&per_cpu(aggregate_lock, sd->first_cpu));
-}
-
-static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
-{
-	cfs_rq->shares = shares;
-}
-
-#else
-
-static inline void init_aggregate(void)
-{
-}
-
-static inline int get_aggregate(struct sched_domain *sd)
-{
-	return 0;
-}
-
-static inline void put_aggregate(struct sched_domain *sd)
-{
-}
-#endif
-
 #else /* CONFIG_SMP */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1848,14 +1491,26 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 
 #define sched_class_highest (&rt_sched_class)
 
-static void inc_nr_running(struct rq *rq)
+static inline void inc_load(struct rq *rq, const struct task_struct *p)
+{
+	update_load_add(&rq->load, p->se.load.weight);
+}
+
+static inline void dec_load(struct rq *rq, const struct task_struct *p)
+{
+	update_load_sub(&rq->load, p->se.load.weight);
+}
+
+static void inc_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running++;
+	inc_load(rq, p);
 }
 
-static void dec_nr_running(struct rq *rq)
+static void dec_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running--;
+	dec_load(rq, p);
 }
 
 static void set_load_weight(struct task_struct *p)
@@ -1947,7 +1602,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
 		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, wakeup);
-	inc_nr_running(rq);
+	inc_nr_running(p, rq);
 }
 
 /*
@@ -1959,7 +1614,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
 		rq->nr_uninterruptible++;
 
 	dequeue_task(rq, p, sleep);
-	dec_nr_running(rq);
+	dec_nr_running(p, rq);
 }
 
 /**
@@ -2612,7 +2267,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		 * management (if any):
 		 */
 		p->sched_class->task_new(rq, p);
-		inc_nr_running(rq);
+		inc_nr_running(p, rq);
 	}
 	check_preempt_curr(rq, p);
 #ifdef CONFIG_SMP
@@ -3603,12 +3258,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	unsigned long imbalance;
 	struct rq *busiest;
 	unsigned long flags;
-	int unlock_aggregate;
 
 	cpus_setall(*cpus);
 
-	unlock_aggregate = get_aggregate(sd);
-
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
 	 * sibling can pick up load irrespective of busy siblings. In this case,
@@ -3724,9 +3376,8 @@ redo:
 
 	if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 	    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
-		ld_moved = -1;
-
-	goto out;
+		return -1;
+	return ld_moved;
 
 out_balanced:
 	schedstat_inc(sd, lb_balanced[idle]);
@@ -3741,13 +3392,8 @@ out_one_pinned:
 
 	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 	    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
-		ld_moved = -1;
-	else
-		ld_moved = 0;
-out:
-	if (unlock_aggregate)
-		put_aggregate(sd);
-	return ld_moved;
+		return -1;
+	return 0;
 }
 
 /*
@@ -4934,8 +4580,10 @@ void set_user_nice(struct task_struct *p, long nice)
 		goto out_unlock;
 	}
 	on_rq = p->se.on_rq;
-	if (on_rq)
+	if (on_rq) {
 		dequeue_task(rq, p, 0);
+		dec_load(rq, p);
+	}
 
 	p->static_prio = NICE_TO_PRIO(nice);
 	set_load_weight(p);
@@ -4945,6 +4593,7 @@ void set_user_nice(struct task_struct *p, long nice)
 
 	if (on_rq) {
 		enqueue_task(rq, p, 0);
+		inc_load(rq, p);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
@@ -7319,7 +6968,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
 			sd->span = *cpu_map;
-			sd->first_cpu = first_cpu(sd->span);
 			cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
 			p = sd;
 			sd_allnodes = 1;
@@ -7330,7 +6978,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		SD_INIT(sd, NODE);
 		set_domain_attribute(sd, attr);
 		sched_domain_node_span(cpu_to_node(i), &sd->span);
-		sd->first_cpu = first_cpu(sd->span);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7342,7 +6989,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
 		sd->span = *nodemask;
-		sd->first_cpu = first_cpu(sd->span);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7354,7 +7000,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
 		sd->span = cpu_coregroup_map(i);
-		sd->first_cpu = first_cpu(sd->span);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
@@ -7367,7 +7012,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
 		sd->span = per_cpu(cpu_sibling_map, i);
-		sd->first_cpu = first_cpu(sd->span);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
@@ -8037,7 +7681,6 @@ void __init sched_init(void)
 	}
 
 #ifdef CONFIG_SMP
-	init_aggregate();
 	init_defrootdomain();
 #endif
 
@@ -8602,11 +8245,14 @@ void sched_move_task(struct task_struct *tsk)
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void __set_se_shares(struct sched_entity *se, unsigned long shares)
+static void set_se_shares(struct sched_entity *se, unsigned long shares)
 {
 	struct cfs_rq *cfs_rq = se->cfs_rq;
+	struct rq *rq = cfs_rq->rq;
 	int on_rq;
 
+	spin_lock_irq(&rq->lock);
+
 	on_rq = se->on_rq;
 	if (on_rq)
 		dequeue_entity(cfs_rq, se, 0);
@@ -8616,17 +8262,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
 
 	if (on_rq)
 		enqueue_entity(cfs_rq, se, 0);
-}
 
-static void set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-	struct cfs_rq *cfs_rq = se->cfs_rq;
-	struct rq *rq = cfs_rq->rq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rq->lock, flags);
-	__set_se_shares(se, shares);
-	spin_unlock_irqrestore(&rq->lock, flags);
+	spin_unlock_irq(&rq->lock);
 }
 
 static DEFINE_MUTEX(shares_mutex);
@@ -8665,13 +8302,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	 * w/o tripping rebalance_share or load_balance_fair.
 	 */
 	tg->shares = shares;
-	for_each_possible_cpu(i) {
-		/*
-		 * force a rebalance
-		 */
-		cfs_rq_set_shares(tg->cfs_rq[i], 0);
+	for_each_possible_cpu(i)
 		set_se_shares(tg->se[i], shares);
-	}
 
 	/*
 	 * Enable load balance activity on this group, by inserting it back on
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5f06118fbc31..8bb713040ac9 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -167,11 +167,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #endif
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_spread_over",
 			cfs_rq->nr_spread_over);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-#ifdef CONFIG_SMP
-	SEQ_printf(m, "  .%-30s: %lu\n", "shares", cfs_rq->shares);
-#endif
-#endif
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0eb0ae879542..f0f25fc12d0a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
-static void
-add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
-{
-	cfs_rq->task_weight += weight;
-}
-#else
-static inline void
-add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
-{
-}
-#endif
-
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_add(&cfs_rq->load, se->load.weight);
-	if (!parent_entity(se))
-		inc_cpu_load(rq_of(cfs_rq), se->load.weight);
-	if (entity_is_task(se))
-		add_cfs_task_weight(cfs_rq, se->load.weight);
 	cfs_rq->nr_running++;
 	se->on_rq = 1;
 	list_add(&se->group_node, &cfs_rq->tasks);
@@ -540,10 +523,6 @@ static void
 account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_sub(&cfs_rq->load, se->load.weight);
-	if (!parent_entity(se))
-		dec_cpu_load(rq_of(cfs_rq), se->load.weight);
-	if (entity_is_task(se))
-		add_cfs_task_weight(cfs_rq, -se->load.weight);
 	cfs_rq->nr_running--;
 	se->on_rq = 0;
 	list_del_init(&se->group_node);
@@ -1327,90 +1306,75 @@ static struct task_struct *load_balance_next_fair(void *arg)
 	return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
 }
 
-static unsigned long
-__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		unsigned long max_load_move, struct sched_domain *sd,
-		enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
-		struct cfs_rq *cfs_rq)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
 {
-	struct rq_iterator cfs_rq_iterator;
+	struct sched_entity *curr;
+	struct task_struct *p;
 
-	cfs_rq_iterator.start = load_balance_start_fair;
-	cfs_rq_iterator.next = load_balance_next_fair;
-	cfs_rq_iterator.arg = cfs_rq;
+	if (!cfs_rq->nr_running || !first_fair(cfs_rq))
+		return MAX_PRIO;
+
+	curr = cfs_rq->curr;
+	if (!curr)
+		curr = __pick_next_entity(cfs_rq);
 
-	return balance_tasks(this_rq, this_cpu, busiest,
-			max_load_move, sd, idle, all_pinned,
-			this_best_prio, &cfs_rq_iterator);
+	p = task_of(curr);
+
+	return p->prio;
 }
+#endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
 		  struct sched_domain *sd, enum cpu_idle_type idle,
 		  int *all_pinned, int *this_best_prio)
 {
+	struct cfs_rq *busy_cfs_rq;
 	long rem_load_move = max_load_move;
-	int busiest_cpu = cpu_of(busiest);
-	struct task_group *tg;
-
-	rcu_read_lock();
-	list_for_each_entry(tg, &task_groups, list) {
-		long imbalance;
-		unsigned long this_weight, busiest_weight;
-		long rem_load, max_load, moved_load;
-
-		/*
-		 * empty group
-		 */
-		if (!aggregate(tg, sd)->task_weight)
-			continue;
-
-		rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
-		rem_load /= aggregate(tg, sd)->load + 1;
-
-		this_weight = tg->cfs_rq[this_cpu]->task_weight;
-		busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
+	struct rq_iterator cfs_rq_iterator;
 
-		imbalance = (busiest_weight - this_weight) / 2;
+	cfs_rq_iterator.start = load_balance_start_fair;
+	cfs_rq_iterator.next = load_balance_next_fair;
 
-		if (imbalance < 0)
-			imbalance = busiest_weight;
+	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+		struct cfs_rq *this_cfs_rq;
+		long imbalance;
+		unsigned long maxload;
 
-		max_load = max(rem_load, imbalance);
-		moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
-				max_load, sd, idle, all_pinned, this_best_prio,
-				tg->cfs_rq[busiest_cpu]);
+		this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
 
-		if (!moved_load)
+		imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
+		/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
+		if (imbalance <= 0)
 			continue;
 
-		move_group_shares(tg, sd, busiest_cpu, this_cpu);
+		/* Don't pull more than imbalance/2 */
+		imbalance /= 2;
+		maxload = min(rem_load_move, imbalance);
 
-		moved_load *= aggregate(tg, sd)->load;
-		moved_load /= aggregate(tg, sd)->rq_weight + 1;
+		*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
+#else
+# define maxload rem_load_move
+#endif
+		/*
+		 * pass busy_cfs_rq argument into
+		 * load_balance_[start|next]_fair iterators
+		 */
+		cfs_rq_iterator.arg = busy_cfs_rq;
+		rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
+					       maxload, sd, idle, all_pinned,
+					       this_best_prio,
+					       &cfs_rq_iterator);
 
-		rem_load_move -= moved_load;
-		if (rem_load_move < 0)
+		if (rem_load_move <= 0)
 			break;
 	}
-	rcu_read_unlock();
 
 	return max_load_move - rem_load_move;
 }
-#else
-static unsigned long
-load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		  unsigned long max_load_move,
-		  struct sched_domain *sd, enum cpu_idle_type idle,
-		  int *all_pinned, int *this_best_prio)
-{
-	return __load_balance_fair(this_rq, this_cpu, busiest,
-			max_load_move, sd, idle, all_pinned,
-			this_best_prio, &busiest->cfs);
-}
-#endif
 
 static int
 move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 060e87b0cb1c..3432d573205d 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -513,8 +513,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
 	 */
 	for_each_sched_rt_entity(rt_se)
 		enqueue_rt_entity(rt_se);
-
-	inc_cpu_load(rq, p->se.load.weight);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -534,8 +532,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 		if (rt_rq && rt_rq->rt_nr_running)
 			enqueue_rt_entity(rt_se);
 	}
-
-	dec_cpu_load(rq, p->se.load.weight);
 }
 
 /*
-- 
cgit v1.2.3


From ea3f01f8afd3bc5daff915cc4ea5cc5ea9e7d427 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 29 May 2008 14:32:23 +0200
Subject: sched: re-tune NUMA topologies

improve the sysbench ramp-up phase and its peak throughput on
a 16way NUMA box, by turning on WAKE_AFFINE:

             tip/sched   tip/sched+wake-affine
-------------------------------------------------
    1:             700              830    +15.65%
    2:            1465             1391    -5.28%
    4:            3017             3105    +2.81%
    8:            5100             6021    +15.30%
   16:           10725            10745    +0.19%
   32:           10135            10150    +0.16%
   64:            9338             9240    -1.06%
  128:            8599             8252    -4.21%
  256:            8475             8144    -4.07%
-------------------------------------------------
  SUM:           57558            57882    +0.56%

this change also improves lat_ctx from 6.69 usecs to 1.11 usec:

  $ ./lat_ctx -s 0 2
  "size=0k ovr=1.19
  2 1.11

  $ ./lat_ctx -s 0 2
  "size=0k ovr=1.22
  2 6.69

in sysbench it's an overall win with some weakness at the lots-of-clients
side. That happens because we now under-balance this workload
a bit. To counter that effect, turn on NEWIDLE:

              wake-idle          wake-idle+newidle
 -------------------------------------------------
     1:             830              834    +0.43%
     2:            1391             1401    +0.65%
     4:            3105             3091    -0.43%
     8:            6021             6046    +0.42%
    16:           10745            10736    -0.08%
    32:           10150            10206    +0.55%
    64:            9240             9533    +3.08%
   128:            8252             8355    +1.24%
   256:            8144             8384    +2.87%
 -------------------------------------------------
   SUM:           57882            58591    +1.21%

as a bonus this not only improves the many-clients case but
also improves the (more important) rampup phase.

sysbench is a workload that quickly breaks down if the
scheduler over-balances, so since it showed an improvement
under NEWIDLE this change is definitely good.
---
 include/linux/topology.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 4bb7074a2c3a..24f3d2282e11 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -166,7 +166,9 @@ void arch_update_cpu_topology(void);
 	.busy_idx		= 3,			\
 	.idle_idx		= 3,			\
 	.flags			= SD_LOAD_BALANCE	\
-				| SD_SERIALIZE,	\
+				| SD_BALANCE_NEWIDLE	\
+				| SD_WAKE_AFFINE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
 }
-- 
cgit v1.2.3


From 413c239fad68258157f903b3ffd9bfcc53f5e34b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 30 May 2008 10:16:40 +1000
Subject: driver-core: prepare for 2.6.27 api change by adding dev_set_name

Create the dev_set_name function now so that various subsystems can
start changing over to it before other changes in 2.6.27 will make it
compulsory.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 15 +++++++++++++++
 include/linux/device.h |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 72eccae4904b..422cfcad486d 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -759,6 +759,21 @@ static void device_remove_class_symlinks(struct device *dev)
 	sysfs_remove_link(&dev->kobj, "subsystem");
 }
 
+/**
+ * dev_set_name - set a device name
+ * @dev: device
+ */
+int dev_set_name(struct device *dev, const char *fmt, ...)
+{
+	va_list vargs;
+
+	va_start(vargs, fmt);
+	vsnprintf(dev->bus_id, sizeof(dev->bus_id), fmt, vargs);
+	va_end(vargs);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dev_set_name);
+
 /**
  * device_add - add device to device hierarchy.
  * @dev: device.
diff --git a/include/linux/device.h b/include/linux/device.h
index 14616e80213c..6a2d04c011bc 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -385,6 +385,9 @@ static inline const char *dev_name(struct device *dev)
 	return dev->bus_id;
 }
 
+extern int dev_set_name(struct device *dev, const char *name, ...)
+			__attribute__((format(printf, 2, 3)));
+
 #ifdef CONFIG_NUMA
 static inline int dev_to_node(struct device *dev)
 {
-- 
cgit v1.2.3


From 3ef536095446552823fc488fec1c5451aab1260d Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 16 May 2008 11:17:03 +0200
Subject: virtio_blk: allow read-only disks

Hello Rusty,

sometimes it is useful to share a disk (e.g. usr). To avoid file system
corruption, the disk should be mounted read-only in that case. This patch
adds a new feature flag, that allows the host to specify, if the disk should
be considered read-only.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 6 +++++-
 include/linux/virtio_blk.h | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c4804f3465db..dd7ea203f940 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -260,6 +260,10 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
+	/* If disk is read-only in the host, the guest should obey */
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
+		set_disk_ro(vblk->disk, 1);
+
 	/* Host must always specify the capacity. */
 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
 			  &cap, sizeof(cap));
@@ -326,7 +330,7 @@ static struct virtio_device_id id_table[] = {
 
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
-	VIRTIO_BLK_F_GEOMETRY,
+	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index d4695a3356d0..b80919fad0ef 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -10,6 +10,7 @@
 #define VIRTIO_BLK_F_SIZE_MAX	1	/* Indicates maximum segment size */
 #define VIRTIO_BLK_F_SEG_MAX	2	/* Indicates maximum # of segments */
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
+#define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 
 struct virtio_blk_config
 {
-- 
cgit v1.2.3


From f7f510ec195781c857ab76366a3e1c59e1caae42 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 30 May 2008 15:09:44 -0500
Subject: virtio: An entropy device, as suggested by hpa.

Note that by itself, having a "hardware" random generator does very
little: you should probably run "rngd" in your guest to feed this into
the kernel entropy pool.

Included:
	virtio_rng: dont use vmalloced addresses for virtio

	If virtio_rng is build as a module, random_data is an address
	in vmalloc space. As virtio expects guest real addresses, this
	can cause any kind of funny behaviour, so lets allocate
	random_data dynamically with kmalloc.

	Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hw_random/Kconfig      |   9 +++
 drivers/char/hw_random/Makefile     |   1 +
 drivers/char/hw_random/virtio-rng.c | 155 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio_rng.h          |   8 ++
 4 files changed, 173 insertions(+)
 create mode 100644 drivers/char/hw_random/virtio-rng.c
 create mode 100644 include/linux/virtio_rng.h

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 8d6c2089d2a8..efd0b4db7c8e 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -112,3 +112,12 @@ config HW_RANDOM_PASEMI
 
 	  If unsure, say Y.
 
+config HW_RANDOM_VIRTIO
+	tristate "VirtIO Random Number Generator support"
+	depends on HW_RANDOM && VIRTIO
+	---help---
+	  This driver provides kernel-side support for the virtual Random Number
+	  Generator hardware.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called virtio-rng.  If unsure, say N.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index c8b7300e2fb1..b4940ddbb35f 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o
 obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o
 obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o
 obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o
+obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
new file mode 100644
index 000000000000..d0e563e4fc39
--- /dev/null
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -0,0 +1,155 @@
+/*
+ * Randomness driver for virtio
+ *  Copyright (C) 2007, 2008 Rusty Russell IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+#include <linux/err.h>
+#include <linux/hw_random.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+#include <linux/virtio.h>
+#include <linux/virtio_rng.h>
+
+/* The host will fill any buffer we give it with sweet, sweet randomness.  We
+ * give it 64 bytes at a time, and the hwrng framework takes it 4 bytes at a
+ * time. */
+#define RANDOM_DATA_SIZE 64
+
+static struct virtqueue *vq;
+static u32 *random_data;
+static unsigned int data_left;
+static DECLARE_COMPLETION(have_data);
+
+static void random_recv_done(struct virtqueue *vq)
+{
+	int len;
+
+	/* We never get spurious callbacks. */
+	if (!vq->vq_ops->get_buf(vq, &len))
+		BUG();
+
+	data_left = len / sizeof(random_data[0]);
+	complete(&have_data);
+}
+
+static void register_buffer(void)
+{
+	struct scatterlist sg;
+
+	sg_init_one(&sg, random_data, RANDOM_DATA_SIZE);
+	/* There should always be room for one buffer. */
+	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
+		BUG();
+	vq->vq_ops->kick(vq);
+}
+
+/* At least we don't udelay() in a loop like some other drivers. */
+static int virtio_data_present(struct hwrng *rng, int wait)
+{
+	if (data_left)
+		return 1;
+
+	if (!wait)
+		return 0;
+
+	wait_for_completion(&have_data);
+	return 1;
+}
+
+/* virtio_data_present() must have succeeded before this is called. */
+static int virtio_data_read(struct hwrng *rng, u32 *data)
+{
+	BUG_ON(!data_left);
+
+	*data = random_data[--data_left];
+
+	if (!data_left) {
+		init_completion(&have_data);
+		register_buffer();
+	}
+	return sizeof(*data);
+}
+
+static struct hwrng virtio_hwrng = {
+	.name = "virtio",
+	.data_present = virtio_data_present,
+	.data_read = virtio_data_read,
+};
+
+static int virtrng_probe(struct virtio_device *vdev)
+{
+	int err;
+
+	/* We expect a single virtqueue. */
+	vq = vdev->config->find_vq(vdev, 0, random_recv_done);
+	if (IS_ERR(vq))
+		return PTR_ERR(vq);
+
+	err = hwrng_register(&virtio_hwrng);
+	if (err) {
+		vdev->config->del_vq(vq);
+		return err;
+	}
+
+	register_buffer();
+	return 0;
+}
+
+static void virtrng_remove(struct virtio_device *vdev)
+{
+	vdev->config->reset(vdev);
+	hwrng_unregister(&virtio_hwrng);
+	vdev->config->del_vq(vq);
+}
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_RNG, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct virtio_driver virtio_rng = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table =	id_table,
+	.probe =	virtrng_probe,
+	.remove =	__devexit_p(virtrng_remove),
+};
+
+static int __init init(void)
+{
+	int err;
+
+	random_data = kmalloc(RANDOM_DATA_SIZE, GFP_KERNEL);
+	if (!random_data)
+		return -ENOMEM;
+
+	err = register_virtio_driver(&virtio_rng);
+	if (err)
+		kfree(random_data);
+	return err;
+}
+
+static void __exit fini(void)
+{
+	kfree(random_data);
+	unregister_virtio_driver(&virtio_rng);
+}
+module_init(init);
+module_exit(fini);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio random number driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
new file mode 100644
index 000000000000..331afb6c9f62
--- /dev/null
+++ b/include/linux/virtio_rng.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_VIRTIO_RNG_H
+#define _LINUX_VIRTIO_RNG_H
+#include <linux/virtio_config.h>
+
+/* The ID for virtio_rng */
+#define VIRTIO_ID_RNG	4
+
+#endif /* _LINUX_VIRTIO_RNG_H */
-- 
cgit v1.2.3


From 7f31fe05000af54e1af81f65a96cab90db8d7ed8 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 29 May 2008 11:08:01 +0200
Subject: virtio_config: fix len calculation of config elements

Rusty,

This patch is a prereq for the virtio_blk blocksize patch, please apply it
first.

Adding an u32 value to the virtio_blk_config unconvered a small bug the config
space defintions:
v is a pointer, to we have to use sizeof(*v) instead of sizeof(v).

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 50db245c81ad..71d6c102497e 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -99,7 +99,7 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
  * The return value is -ENOENT if the feature doesn't exist.  Otherwise
  * the config value is copied into whatever is pointed to by v. */
 #define virtio_config_val(vdev, fbit, offset, v) \
-	virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(v))
+	virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(*v))
 
 static inline int virtio_config_buf(struct virtio_device *vdev,
 				    unsigned int fbit,
-- 
cgit v1.2.3


From 7757f09c70af87887dfc195e6d6ddd54f5cc7c39 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 29 May 2008 11:10:01 +0200
Subject: virtio_blk: fix endianess annotations

Since commit 72e61eb40b55dd57031ec5971e810649f82b0259 (virtio: change config
to guest endian) config space is no longer fixed endian.

Lets change the virtio_blk_config variables.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_blk.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index b80919fad0ef..5f79a5f9de79 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -15,14 +15,14 @@
 struct virtio_blk_config
 {
 	/* The capacity (in 512-byte sectors). */
-	__le64 capacity;
+	__u64 capacity;
 	/* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */
-	__le32 size_max;
+	__u32 size_max;
 	/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
-	__le32 seg_max;
+	__u32 seg_max;
 	/* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
 	struct virtio_blk_geometry {
-		__le16 cylinders;
+		__u16 cylinders;
 		__u8 heads;
 		__u8 sectors;
 	} geometry;
-- 
cgit v1.2.3


From b4f68be6c5d507afdcd74f5be3df0b1209cda503 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 30 May 2008 15:09:45 -0500
Subject: virtio: force callback on empty.

virtio allows drivers to suppress callbacks (ie. interrupts) for
efficiency (no locking, it's just an optimization).

There's a similar mechanism for the host to suppress notifications
coming from the guest: in that case, we ignore the suppression if the
ring is completely full.

It turns out that life is simpler if the host similarly ignores
callback suppression when the ring is completely empty: the network
driver wants to free up old packets in a timely manner, and otherwise
has to use a timer to poll.

We have to remove the code which ignores interrupts when the driver
has disabled them (again, it had no locking and hence was unreliable
anyway).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c  | 7 -------
 include/linux/virtio_config.h | 4 ++++
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 96d2567a7df8..72bf8bc09014 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -253,13 +253,6 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 	if (unlikely(vq->broken))
 		return IRQ_HANDLED;
 
-	/* Other side may have missed us turning off the interrupt,
-	 * but we should preserve disable semantic for virtio users. */
-	if (unlikely(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
-		pr_debug("virtqueue interrupt after disable for %p\n", vq);
-		return IRQ_HANDLED;
-	}
-
 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
 	if (vq->vq.callback)
 		vq->vq.callback(&vq->vq);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 71d6c102497e..f364bbf63c34 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -15,6 +15,10 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
+/* Do we get callbacks when the ring is completely used, even if we've
+ * suppressed them? */
+#define VIRTIO_F_NOTIFY_ON_EMPTY	24
+
 #ifdef __KERNEL__
 #include <linux/virtio.h>
 
-- 
cgit v1.2.3


From 5adad0133907790c50283bf03271d920d6897043 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Fri, 30 May 2008 10:40:46 -0400
Subject: Input: rename SW_RADIO to SW_RFKILL_ALL

The SW_RADIO code for EV_SW events has a name that is not descriptive
enough of its intended function, and could induce someone to think
KEY_RADIO is its EV_KEY counterpart, which is false.

Rename it to SW_RFKILL_ALL, and document what this event is for.  Keep
the old name around, to avoid userspace ABI breaks.

The SW_RFKILL_ALL event is meant to be used by rfkill master switches.  It
is not bound to a particular radio switch type, and usually applies to all
types.  It is semantically tied to master rfkill switches that enable or
disable every radio in a system.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 28a094fcfe20..e075c4b762fb 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -637,7 +637,9 @@ struct input_absinfo {
 #define SW_LID			0x00  /* set = lid shut */
 #define SW_TABLET_MODE		0x01  /* set = tablet mode */
 #define SW_HEADPHONE_INSERT	0x02  /* set = inserted */
-#define SW_RADIO		0x03  /* set = radio enabled */
+#define SW_RFKILL_ALL		0x03  /* rfkill master switch, type "any"
+					 set = radio enabled */
+#define SW_RADIO		SW_RFKILL_ALL	/* deprecated */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
-- 
cgit v1.2.3


From ca05a99a54db1db5bca72eccb5866d2a86f8517f Mon Sep 17 00:00:00 2001
From: "Andrew G. Morgan" <morgan@kernel.org>
Date: Tue, 27 May 2008 22:05:17 -0700
Subject: capabilities: remain source compatible with 32-bit raw legacy
 capability support.

Source code out there hard-codes a notion of what the
_LINUX_CAPABILITY_VERSION #define means in terms of the semantics of the
raw capability system calls capget() and capset().  Its unfortunate, but
true.

Since the confusing header file has been in a released kernel, there is
software that is erroneously using 64-bit capabilities with the semantics
of 32-bit compatibilities.  These recently compiled programs may suffer
corruption of their memory when sys_getcap() overwrites more memory than
they are coded to expect, and the raising of added capabilities when using
sys_capset().

As such, this patch does a number of things to clean up the situation
for all. It

  1. forces the _LINUX_CAPABILITY_VERSION define to always retain its
     legacy value.

  2. adopts a new #define strategy for the kernel's internal
     implementation of the preferred magic.

  3. deprecates v2 capability magic in favor of a new (v3) magic
     number. The functionality of v3 is entirely equivalent to v2,
     the only difference being that the v2 magic causes the kernel
     to log a "deprecated" warning so the admin can find applications
     that may be using v2 inappropriately.

[User space code continues to be encouraged to use the libcap API which
protects the application from details like this.  libcap-2.10 is the first
to support v3 capabilities.]

Fixes issue reported in https://bugzilla.redhat.com/show_bug.cgi?id=447518.
Thanks to Bojan Smojver for the report.

[akpm@linux-foundation.org: s/depreciate/deprecate/g]
[akpm@linux-foundation.org: be robust about put_user size]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Bojan Smojver <bojan@rexursive.com>
Cc: stable@kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
---
 fs/proc/array.c            |   2 +-
 include/linux/capability.h |  29 ++++++++----
 kernel/capability.c        | 111 +++++++++++++++++++++++++++++----------------
 3 files changed, 95 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9e3b8c33c24b..797d775e0354 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -288,7 +288,7 @@ static void render_cap_t(struct seq_file *m, const char *header,
 	seq_printf(m, "%s", header);
 	CAP_FOR_EACH_U32(__capi) {
 		seq_printf(m, "%08x",
-			   a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
+			   a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
 	}
 	seq_printf(m, "\n");
 }
diff --git a/include/linux/capability.h b/include/linux/capability.h
index f4ea0dd9a618..fa830f8de032 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -31,11 +31,11 @@ struct task_struct;
 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
 #define _LINUX_CAPABILITY_U32S_1     1
 
-#define _LINUX_CAPABILITY_VERSION_2  0x20071026
+#define _LINUX_CAPABILITY_VERSION_2  0x20071026  /* deprecated - use v3 */
 #define _LINUX_CAPABILITY_U32S_2     2
 
-#define _LINUX_CAPABILITY_VERSION    _LINUX_CAPABILITY_VERSION_2
-#define _LINUX_CAPABILITY_U32S       _LINUX_CAPABILITY_U32S_2
+#define _LINUX_CAPABILITY_VERSION_3  0x20080522
+#define _LINUX_CAPABILITY_U32S_3     2
 
 typedef struct __user_cap_header_struct {
 	__u32 version;
@@ -77,10 +77,23 @@ struct vfs_cap_data {
 	} data[VFS_CAP_U32];
 };
 
-#ifdef __KERNEL__
+#ifndef __KERNEL__
+
+/*
+ * Backwardly compatible definition for source code - trapped in a
+ * 32-bit world. If you find you need this, please consider using
+ * libcap to untrap yourself...
+ */
+#define _LINUX_CAPABILITY_VERSION  _LINUX_CAPABILITY_VERSION_1
+#define _LINUX_CAPABILITY_U32S     _LINUX_CAPABILITY_U32S_1
+
+#else
+
+#define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
+#define _KERNEL_CAPABILITY_U32S    _LINUX_CAPABILITY_U32S_3
 
 typedef struct kernel_cap_struct {
-	__u32 cap[_LINUX_CAPABILITY_U32S];
+	__u32 cap[_KERNEL_CAPABILITY_U32S];
 } kernel_cap_t;
 
 #define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
@@ -351,7 +364,7 @@ typedef struct kernel_cap_struct {
  */
 
 #define CAP_FOR_EACH_U32(__capi)  \
-	for (__capi = 0; __capi < _LINUX_CAPABILITY_U32S; ++__capi)
+	for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi)
 
 # define CAP_FS_MASK_B0     (CAP_TO_MASK(CAP_CHOWN)		\
 			    | CAP_TO_MASK(CAP_DAC_OVERRIDE)	\
@@ -361,7 +374,7 @@ typedef struct kernel_cap_struct {
 
 # define CAP_FS_MASK_B1     (CAP_TO_MASK(CAP_MAC_OVERRIDE))
 
-#if _LINUX_CAPABILITY_U32S != 2
+#if _KERNEL_CAPABILITY_U32S != 2
 # error Fix up hand-coded capability macro initializers
 #else /* HAND-CODED capability initializers */
 
@@ -372,7 +385,7 @@ typedef struct kernel_cap_struct {
 # define CAP_NFSD_SET     ((kernel_cap_t){{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \
 					CAP_FS_MASK_B1 } })
 
-#endif /* _LINUX_CAPABILITY_U32S != 2 */
+#endif /* _KERNEL_CAPABILITY_U32S != 2 */
 
 #define CAP_INIT_INH_SET    CAP_EMPTY_SET
 
diff --git a/kernel/capability.c b/kernel/capability.c
index 39e8193b41ea..cfbe44299488 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -52,6 +52,69 @@ static void warn_legacy_capability_use(void)
 	}
 }
 
+/*
+ * Version 2 capabilities worked fine, but the linux/capability.h file
+ * that accompanied their introduction encouraged their use without
+ * the necessary user-space source code changes. As such, we have
+ * created a version 3 with equivalent functionality to version 2, but
+ * with a header change to protect legacy source code from using
+ * version 2 when it wanted to use version 1. If your system has code
+ * that trips the following warning, it is using version 2 specific
+ * capabilities and may be doing so insecurely.
+ *
+ * The remedy is to either upgrade your version of libcap (to 2.10+,
+ * if the application is linked against it), or recompile your
+ * application with modern kernel headers and this warning will go
+ * away.
+ */
+
+static void warn_deprecated_v2(void)
+{
+	static int warned;
+
+	if (!warned) {
+		char name[sizeof(current->comm)];
+
+		printk(KERN_INFO "warning: `%s' uses deprecated v2"
+		       " capabilities in a way that may be insecure.\n",
+		       get_task_comm(name, current));
+		warned = 1;
+	}
+}
+
+/*
+ * Version check. Return the number of u32s in each capability flag
+ * array, or a negative value on error.
+ */
+static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
+{
+	__u32 version;
+
+	if (get_user(version, &header->version))
+		return -EFAULT;
+
+	switch (version) {
+	case _LINUX_CAPABILITY_VERSION_1:
+		warn_legacy_capability_use();
+		*tocopy = _LINUX_CAPABILITY_U32S_1;
+		break;
+	case _LINUX_CAPABILITY_VERSION_2:
+		warn_deprecated_v2();
+		/*
+		 * fall through - v3 is otherwise equivalent to v2.
+		 */
+	case _LINUX_CAPABILITY_VERSION_3:
+		*tocopy = _LINUX_CAPABILITY_U32S_3;
+		break;
+	default:
+		if (put_user((u32)_KERNEL_CAPABILITY_VERSION, &header->version))
+			return -EFAULT;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /*
  * For sys_getproccap() and sys_setproccap(), any of the three
  * capability set pointers may be NULL -- indicating that that set is
@@ -71,27 +134,13 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
 {
 	int ret = 0;
 	pid_t pid;
-	__u32 version;
 	struct task_struct *target;
 	unsigned tocopy;
 	kernel_cap_t pE, pI, pP;
 
-	if (get_user(version, &header->version))
-		return -EFAULT;
-
-	switch (version) {
-	case _LINUX_CAPABILITY_VERSION_1:
-		warn_legacy_capability_use();
-		tocopy = _LINUX_CAPABILITY_U32S_1;
-		break;
-	case _LINUX_CAPABILITY_VERSION_2:
-		tocopy = _LINUX_CAPABILITY_U32S_2;
-		break;
-	default:
-		if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
-			return -EFAULT;
-		return -EINVAL;
-	}
+	ret = cap_validate_magic(header, &tocopy);
+	if (ret != 0)
+		return ret;
 
 	if (get_user(pid, &header->pid))
 		return -EFAULT;
@@ -118,7 +167,7 @@ out:
 	spin_unlock(&task_capability_lock);
 
 	if (!ret) {
-		struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S];
+		struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 		unsigned i;
 
 		for (i = 0; i < tocopy; i++) {
@@ -128,7 +177,7 @@ out:
 		}
 
 		/*
-		 * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S,
+		 * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
 		 * we silently drop the upper capabilities here. This
 		 * has the effect of making older libcap
 		 * implementations implicitly drop upper capability
@@ -240,30 +289,16 @@ static inline int cap_set_all(kernel_cap_t *effective,
  */
 asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 {
-	struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S];
+	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 	unsigned i, tocopy;
 	kernel_cap_t inheritable, permitted, effective;
-	__u32 version;
 	struct task_struct *target;
 	int ret;
 	pid_t pid;
 
-	if (get_user(version, &header->version))
-		return -EFAULT;
-
-	switch (version) {
-	case _LINUX_CAPABILITY_VERSION_1:
-		warn_legacy_capability_use();
-		tocopy = _LINUX_CAPABILITY_U32S_1;
-		break;
-	case _LINUX_CAPABILITY_VERSION_2:
-		tocopy = _LINUX_CAPABILITY_U32S_2;
-		break;
-	default:
-		if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
-			return -EFAULT;
-		return -EINVAL;
-	}
+	ret = cap_validate_magic(header, &tocopy);
+	if (ret != 0)
+		return ret;
 
 	if (get_user(pid, &header->pid))
 		return -EFAULT;
@@ -281,7 +316,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 		permitted.cap[i] = kdata[i].permitted;
 		inheritable.cap[i] = kdata[i].inheritable;
 	}
-	while (i < _LINUX_CAPABILITY_U32S) {
+	while (i < _KERNEL_CAPABILITY_U32S) {
 		effective.cap[i] = 0;
 		permitted.cap[i] = 0;
 		inheritable.cap[i] = 0;
-- 
cgit v1.2.3


From 63e14626eddb534ab429e9c2b95d3f7038b596b6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Sun, 1 Jun 2008 11:49:32 +0200
Subject: mmc_spi: mmc_spi.h should include linux/interrupts.h

Since mmc_spi.h uses irqreturn_t type, it should include appropriate
header, otherwise build will break if users didn't include it (some of
them do not use interrupts).

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/mmc_spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h
index e9bbe3ebd721..d5ca78b93a3b 100644
--- a/include/linux/spi/mmc_spi.h
+++ b/include/linux/spi/mmc_spi.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_SPI_MMC_SPI_H
 #define __LINUX_SPI_MMC_SPI_H
 
+#include <linux/interrupt.h>
+
 struct device;
 struct mmc_host;
 
-- 
cgit v1.2.3


From 64e9159f5d2c4edf5fa6425031e556f8fddaf7e6 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Tue, 3 Jun 2008 15:18:54 +0100
Subject: serial_core: uart_set_ldisc infrastructure

The tty layer provides a callback that is used when the line discipline
is changed. Some hardware uses this to configure hardware specific
features such as IrDA mode on serial ports. Unfortunately the serial
layer does not provide this feature or pass it down to drivers.

Blackfin used to hack around this by rewriting the tty ops, but those are
now properly shared and const so the hack fails. Instead provide the
proper operations.

This change plus a follow up from the Blackfin guys is needed to avoid
blackfin losing features in this release.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/serial_core.c | 10 ++++++++++
 include/linux/serial_core.h  |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 53b03c629aff..951a75ea6e3e 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1165,6 +1165,15 @@ out:
 	return ret;
 }
 
+static void uart_set_ldisc(struct tty_struct *tty, int ldisc)
+{
+	struct uart_state *state = tty->driver_data;
+	struct uart_port *port = state->port;
+
+	if (port->ops->set_ldisc)
+		port->ops->set_ldisc(port);
+}
+
 static void uart_set_termios(struct tty_struct *tty,
 						struct ktermios *old_termios)
 {
@@ -2288,6 +2297,7 @@ static const struct tty_operations uart_ops = {
 	.unthrottle	= uart_unthrottle,
 	.send_xchar	= uart_send_xchar,
 	.set_termios	= uart_set_termios,
+	.set_ldisc	= uart_set_ldisc,
 	.stop		= uart_stop,
 	.start		= uart_start,
 	.hangup		= uart_hangup,
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d32123ae08ad..d8f31de632c5 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -192,6 +192,7 @@ struct uart_ops {
 	void		(*shutdown)(struct uart_port *);
 	void		(*set_termios)(struct uart_port *, struct ktermios *new,
 				       struct ktermios *old);
+	void		(*set_ldisc)(struct uart_port *);
 	void		(*pm)(struct uart_port *, unsigned int state,
 			      unsigned int oldstate);
 	int		(*set_wake)(struct uart_port *, unsigned int state);
-- 
cgit v1.2.3


From 51b77cae0d5aa8e1546fca855dcfe48ddfadfa9c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Jun 2008 16:36:01 -0700
Subject: route: Mark unused route cache flags as such.

Also removes an obsolete check for the unused flag RTCF_MASQ.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in_route.h | 12 ++++++------
 net/ipv4/route.c         |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in_route.h b/include/linux/in_route.h
index 61f25c30a2a0..b261b8c915f0 100644
--- a/include/linux/in_route.h
+++ b/include/linux/in_route.h
@@ -10,19 +10,19 @@
 #define RTCF_NOPMTUDISC RTM_F_NOPMTUDISC
 
 #define RTCF_NOTIFY	0x00010000
-#define RTCF_DIRECTDST	0x00020000
+#define RTCF_DIRECTDST	0x00020000 /* unused */
 #define RTCF_REDIRECTED	0x00040000
-#define RTCF_TPROXY	0x00080000
+#define RTCF_TPROXY	0x00080000 /* unused */
 
-#define RTCF_FAST	0x00200000
-#define RTCF_MASQ	0x00400000
-#define RTCF_SNAT	0x00800000
+#define RTCF_FAST	0x00200000 /* unused */
+#define RTCF_MASQ	0x00400000 /* unused */
+#define RTCF_SNAT	0x00800000 /* unused */
 #define RTCF_DOREDIRECT 0x01000000
 #define RTCF_DIRECTSRC	0x04000000
 #define RTCF_DNAT	0x08000000
 #define RTCF_BROADCAST	0x10000000
 #define RTCF_MULTICAST	0x20000000
-#define RTCF_REJECT	0x40000000
+#define RTCF_REJECT	0x40000000 /* unused */
 #define RTCF_LOCAL	0x80000000
 
 #define RTCF_NAT	(RTCF_DNAT|RTCF_SNAT)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df41026b60db..96be336064fb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	if (err)
 		flags |= RTCF_DIRECTSRC;
 
-	if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
+	if (out_dev == in_dev && err &&
 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
 	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
 		flags |= RTCF_DOREDIRECT;
-- 
cgit v1.2.3


From 1f9d11c7c99da706e33646c3a9080dd5a8ef9a0b Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Jun 2008 16:36:27 -0700
Subject: route: Mark unused routing attributes as such

Also removes an unused policy entry for an attribute which is
only used in kernel->user direction.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 4 ++--
 net/ipv4/fib_frontend.c   | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 44c81c744538..a2aec2c0cfb5 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -267,10 +267,10 @@ enum rtattr_type_t
 	RTA_PREFSRC,
 	RTA_METRICS,
 	RTA_MULTIPATH,
-	RTA_PROTOINFO,
+	RTA_PROTOINFO, /* no longer used */
 	RTA_FLOW,
 	RTA_CACHEINFO,
-	RTA_SESSION,
+	RTA_SESSION, /* no longer used */
 	RTA_MP_ALGO, /* no longer used */
 	RTA_TABLE,
 	__RTA_MAX
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7a..0b2ac6a3d903 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
 	[RTA_PREFSRC]		= { .type = NLA_U32 },
 	[RTA_METRICS]		= { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
-	[RTA_PROTOINFO]		= { .type = NLA_U32 },
 	[RTA_FLOW]		= { .type = NLA_U32 },
 };
 
-- 
cgit v1.2.3


From ab32cd793dca21eec846a8204390d9594ed994d5 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Jun 2008 16:37:33 -0700
Subject: route: Remove unused ifa_anycast field

The field was supposed to allow the creation of an anycast route by
assigning an anycast address to an address prefix. It was never
implemented so this field is unused and serves no purpose. Remove it.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 -
 net/ipv4/devinet.c         | 9 ---------
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 7009b0cdd06f..c6f51ad52d5b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -117,7 +117,6 @@ struct in_ifaddr
 	__be32			ifa_address;
 	__be32			ifa_mask;
 	__be32			ifa_broadcast;
-	__be32			ifa_anycast;
 	unsigned char		ifa_scope;
 	unsigned char		ifa_flags;
 	unsigned char		ifa_prefixlen;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f34..79a7ef6209ff 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_LOCAL]     	= { .type = NLA_U32 },
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
-	[IFA_ANYCAST]   	= { .type = NLA_U32 },
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 };
 
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 	if (tb[IFA_BROADCAST])
 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 
-	if (tb[IFA_ANYCAST])
-		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
-
 	if (tb[IFA_LABEL])
 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 	else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 				break;
 			inet_del_ifa(in_dev, ifap, 0);
 			ifa->ifa_broadcast = 0;
-			ifa->ifa_anycast = 0;
 			ifa->ifa_scope = 0;
 		}
 
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
 	       + nla_total_size(4) /* IFA_ADDRESS */
 	       + nla_total_size(4) /* IFA_LOCAL */
 	       + nla_total_size(4) /* IFA_BROADCAST */
-	       + nla_total_size(4) /* IFA_ANYCAST */
 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
 }
 
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	if (ifa->ifa_broadcast)
 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
 
-	if (ifa->ifa_anycast)
-		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
-
 	if (ifa->ifa_label[0])
 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
 
-- 
cgit v1.2.3


From 4f0ebe3cc57f18ba26317b56b80b108c2848b1de Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 20 May 2008 02:17:50 +0900
Subject: libata: kill unused constants

Kill a few unused constants.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 4a92fbafce9d..93e2b89d0c57 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -111,13 +111,10 @@ enum {
 	/* various global constants */
 	LIBATA_MAX_PRD		= ATA_MAX_PRD / 2,
 	LIBATA_DUMB_MAX_PRD	= ATA_MAX_PRD / 4,	/* Worst case */
-	ATA_MAX_PORTS		= 8,
 	ATA_DEF_QUEUE		= 1,
 	/* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */
 	ATA_MAX_QUEUE		= 32,
 	ATA_TAG_INTERNAL	= ATA_MAX_QUEUE - 1,
-	ATA_MAX_BUS		= 2,
-	ATA_DEF_BUSY_WAIT	= 10000,
 	ATA_SHORT_PAUSE		= (HZ >> 6) + 1,
 
 	ATAPI_MAX_DRAIN		= 16 << 10,
-- 
cgit v1.2.3


From a57c1bade5a0ee5cd8b74502db9cbebb7f5780b2 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 29 May 2008 22:10:58 +0100
Subject: libata-sff: Fix oops reported in kerneloops.org for pnp devices with
 no ctl

- Make ata_sff_altstatus private so nobody uses it by mistake
- Drop the 400nS delay from it

Add

ata_sff_irq_status	-	encapsulates the IRQ check logic

This function keeps the existing behaviour for altstatus using devices. I
actually suspect the logic was wrong before the changes but -rc isn't the
time to play with that

ata_sff_sync		-	ensure writes hit the device

Really we want an io* operation for 'is posted' eg ioisposted(ioaddr) so
that we can fix the nasty delay this causes on most systems.

- ata_sff_pause		-	400nS delay

Ensure the command hit the device and delay 400nS

- ata_sff_dma_pause

Ensure the I/O hit the device and enforce an HDMA1:0 transition delay.
Requires altstatus register exists, BUG if not so we don't risk
corruption in MWDMA modes. (UDMA the checksum will save your backside in
theory)

The only other complication then is devices with their own handlers.
rb532 can use dma_pause but scc needs to access its own altstatus
register for internal errata workarounds so directly call the drivers own
altstatus function.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c    | 115 ++++++++++++++++++++++++++++++++++++++------
 drivers/ata/pata_icside.c   |   2 +-
 drivers/ata/pata_rb532_cf.c |   4 +-
 drivers/ata/pata_scc.c      |   5 +-
 include/linux/libata.h      |  16 +-----
 5 files changed, 109 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 3c2d2289f85e..90d20c615ef5 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -247,7 +247,7 @@ u8 ata_sff_check_status(struct ata_port *ap)
  *	LOCKING:
  *	Inherited from caller.
  */
-u8 ata_sff_altstatus(struct ata_port *ap)
+static u8 ata_sff_altstatus(struct ata_port *ap)
 {
 	if (ap->ops->sff_check_altstatus)
 		return ap->ops->sff_check_altstatus(ap);
@@ -255,6 +255,93 @@ u8 ata_sff_altstatus(struct ata_port *ap)
 	return ioread8(ap->ioaddr.altstatus_addr);
 }
 
+/**
+ *	ata_sff_irq_status - Check if the device is busy
+ *	@ap: port where the device is
+ *
+ *	Determine if the port is currently busy. Uses altstatus
+ *	if available in order to avoid clearing shared IRQ status
+ *	when finding an IRQ source. Non ctl capable devices don't
+ *	share interrupt lines fortunately for us.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+static u8 ata_sff_irq_status(struct ata_port *ap)
+{
+	u8 status;
+
+	if (ap->ops->sff_check_altstatus || ap->ioaddr.altstatus_addr) {
+		status = ata_sff_altstatus(ap);
+		/* Not us: We are busy */
+		if (status & ATA_BUSY)
+		    	return status;
+	}
+	/* Clear INTRQ latch */
+	status = ata_sff_check_status(ap);
+	return status;
+}
+
+/**
+ *	ata_sff_sync - Flush writes
+ *	@ap: Port to wait for.
+ *
+ *	CAUTION:
+ *	If we have an mmio device with no ctl and no altstatus
+ *	method this will fail. No such devices are known to exist.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
+static void ata_sff_sync(struct ata_port *ap)
+{
+	if (ap->ops->sff_check_altstatus)
+		ap->ops->sff_check_altstatus(ap);
+	else if (ap->ioaddr.altstatus_addr)
+		ioread8(ap->ioaddr.altstatus_addr);
+}
+
+/**
+ *	ata_sff_pause		-	Flush writes and wait 400nS
+ *	@ap: Port to pause for.
+ *
+ *	CAUTION:
+ *	If we have an mmio device with no ctl and no altstatus
+ *	method this will fail. No such devices are known to exist.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
+void ata_sff_pause(struct ata_port *ap)
+{
+	ata_sff_sync(ap);
+	ndelay(400);
+}
+
+/**
+ *	ata_sff_dma_pause	-	Pause before commencing DMA
+ *	@ap: Port to pause for.
+ *
+ *	Perform I/O fencing and ensure sufficient cycle delays occur
+ *	for the HDMA1:0 transition
+ */
+ 
+void ata_sff_dma_pause(struct ata_port *ap)
+{
+	if (ap->ops->sff_check_altstatus || ap->ioaddr.altstatus_addr) {
+		/* An altstatus read will cause the needed delay without
+		   messing up the IRQ status */
+		ata_sff_altstatus(ap);
+		return;
+	}
+	/* There are no DMA controllers without ctl. BUG here to ensure
+	   we never violate the HDMA1:0 transition timing and risk
+	   corruption. */
+	BUG();
+}
+
 /**
  *	ata_sff_busy_sleep - sleep until BSY clears, or timeout
  *	@ap: port containing status register to be polled
@@ -742,7 +829,7 @@ static void ata_pio_sectors(struct ata_queued_cmd *qc)
 	} else
 		ata_pio_sector(qc);
 
-	ata_sff_altstatus(qc->ap); /* flush */
+	ata_sff_sync(qc->ap); /* flush */
 }
 
 /**
@@ -763,8 +850,9 @@ static void atapi_send_cdb(struct ata_port *ap, struct ata_queued_cmd *qc)
 	WARN_ON(qc->dev->cdb_len < 12);
 
 	ap->ops->sff_data_xfer(qc->dev, qc->cdb, qc->dev->cdb_len, 1);
-	ata_sff_altstatus(ap); /* flush */
-
+	ata_sff_sync(ap);
+	/* FIXME: If the CDB is for DMA do we need to do the transition delay
+	   or is bmdma_start guaranteed to do it ? */
 	switch (qc->tf.protocol) {
 	case ATAPI_PROT_PIO:
 		ap->hsm_task_state = HSM_ST;
@@ -905,7 +993,7 @@ static void atapi_pio_bytes(struct ata_queued_cmd *qc)
 
 	if (unlikely(__atapi_pio_bytes(qc, bytes)))
 		goto err_out;
-	ata_sff_altstatus(ap); /* flush */
+	ata_sff_sync(ap); /* flush */
 
 	return;
 
@@ -1489,14 +1577,10 @@ inline unsigned int ata_sff_host_intr(struct ata_port *ap,
 		goto idle_irq;
 	}
 
-	/* check altstatus */
-	status = ata_sff_altstatus(ap);
-	if (status & ATA_BUSY)
-		goto idle_irq;
 
-	/* check main status, clearing INTRQ */
-	status = ap->ops->sff_check_status(ap);
-	if (unlikely(status & ATA_BUSY))
+	/* check main status, clearing INTRQ if needed */
+	status = ata_sff_irq_status(ap);
+	if (status & ATA_BUSY)
 		goto idle_irq;
 
 	/* ack bmdma irq events */
@@ -2030,7 +2114,7 @@ void ata_sff_error_handler(struct ata_port *ap)
 		ap->ops->bmdma_stop(qc);
 	}
 
-	ata_sff_altstatus(ap);
+	ata_sff_sync(ap);		/* FIXME: We don't need this */
 	ap->ops->sff_check_status(ap);
 	ap->ops->sff_irq_clear(ap);
 
@@ -2203,7 +2287,7 @@ void ata_bmdma_stop(struct ata_queued_cmd *qc)
 		 mmio + ATA_DMA_CMD);
 
 	/* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
-	ata_sff_altstatus(ap);        /* dummy read */
+	ata_sff_dma_pause(ap);
 }
 
 /**
@@ -2722,7 +2806,8 @@ EXPORT_SYMBOL_GPL(ata_sff_qc_prep);
 EXPORT_SYMBOL_GPL(ata_sff_dumb_qc_prep);
 EXPORT_SYMBOL_GPL(ata_sff_dev_select);
 EXPORT_SYMBOL_GPL(ata_sff_check_status);
-EXPORT_SYMBOL_GPL(ata_sff_altstatus);
+EXPORT_SYMBOL_GPL(ata_sff_dma_pause);
+EXPORT_SYMBOL_GPL(ata_sff_pause);
 EXPORT_SYMBOL_GPL(ata_sff_busy_sleep);
 EXPORT_SYMBOL_GPL(ata_sff_wait_ready);
 EXPORT_SYMBOL_GPL(ata_sff_tf_load);
diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c
index 17138436423d..cf9e9848f8b5 100644
--- a/drivers/ata/pata_icside.c
+++ b/drivers/ata/pata_icside.c
@@ -270,7 +270,7 @@ static void pata_icside_bmdma_stop(struct ata_queued_cmd *qc)
 	disable_dma(state->dma);
 
 	/* see ata_bmdma_stop */
-	ata_sff_altstatus(ap);
+	ata_sff_dma_pause(ap);
 }
 
 static u8 pata_icside_bmdma_status(struct ata_port *ap)
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index a108d259f19d..f8b3ffc8ae9e 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -57,7 +57,9 @@ static inline void rb532_pata_finish_io(struct ata_port *ap)
 	struct ata_host *ah = ap->host;
 	struct rb532_cf_info *info = ah->private_data;
 
-	ata_sff_altstatus(ap);
+	/* FIXME: Keep previous delay. If this is merely a fence then
+	   ata_sff_sync might be sufficient. */
+	ata_sff_dma_pause(ap);
 	ndelay(RB500_CF_IO_DELAY);
 
 	set_irq_type(info->irq, IRQ_TYPE_LEVEL_HIGH);
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index e965b251ca24..bbf5aa345e68 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -726,7 +726,7 @@ static void scc_bmdma_stop (struct ata_queued_cmd *qc)
 		 in_be32(bmid_base + SCC_DMA_CMD) & ~ATA_DMA_START);
 
 	/* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
-	ata_sff_altstatus(ap);	/* dummy read */
+	ata_sff_dma_pause(ap);	/* dummy read */
 }
 
 /**
@@ -747,7 +747,8 @@ static u8 scc_bmdma_status (struct ata_port *ap)
 		return host_stat;
 
 	/* errata A252,A308 workaround: Step4 */
-	if ((ata_sff_altstatus(ap) & ATA_ERR) && (int_status & INTSTS_INTRQ))
+	if ((scc_check_altstatus(ap) & ATA_ERR)
+					&& (int_status & INTSTS_INTRQ))
 		return (host_stat | ATA_DMA_INTR);
 
 	/* errata A308 workaround Step5 */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 93e2b89d0c57..e57e5d08312d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1432,7 +1432,8 @@ extern void ata_sff_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_sff_dumb_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_sff_dev_select(struct ata_port *ap, unsigned int device);
 extern u8 ata_sff_check_status(struct ata_port *ap);
-extern u8 ata_sff_altstatus(struct ata_port *ap);
+extern void ata_sff_pause(struct ata_port *ap);
+extern void ata_sff_dma_pause(struct ata_port *ap);
 extern int ata_sff_busy_sleep(struct ata_port *ap,
 			      unsigned long timeout_pat, unsigned long timeout);
 extern int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline);
@@ -1492,19 +1493,6 @@ extern int ata_pci_sff_init_one(struct pci_dev *pdev,
 				struct scsi_host_template *sht, void *host_priv);
 #endif /* CONFIG_PCI */
 
-/**
- *	ata_sff_pause - Flush writes and pause 400 nanoseconds.
- *	@ap: Port to wait for.
- *
- *	LOCKING:
- *	Inherited from caller.
- */
-static inline void ata_sff_pause(struct ata_port *ap)
-{
-	ata_sff_altstatus(ap);
-	ndelay(400);
-}
-
 /**
  *	ata_sff_busy_wait - Wait for a port status register
  *	@ap: Port to wait for.
-- 
cgit v1.2.3


From 39028ec69b13712ec1dcd9aa14844bf60f19cb20 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 2 Jun 2008 15:46:51 -0300
Subject: V4L/DVB (7166): [v4l] Add new user class controls and deprecate
 others

These were removed in commit 26d507fcfef7f7d0cd2eec874a87169cc121c835:

> -#define V4L2_CID_HCENTER               (V4L2_CID_BASE+22)
> -#define V4L2_CID_VCENTER               (V4L2_CID_BASE+23)
> -#define V4L2_CID_LASTP1                        (V4L2_CID_BASE+24) /*
> last CID + 1 */
> +
> +/* Deprecated, use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */
> +#define V4L2_CID_HCENTER_DEPRECATED    (V4L2_CID_BASE+22)
> +#define V4L2_CID_VCENTER_DEPRECATED    (V4L2_CID_BASE+23)

But there was no warning in Documentation/feature-removal-schedule.txt
and I'm receiving reports that it's breaking userspace apps (the
gstreamer-v4l2 plugin breaks in Fedora rawhide). You can't just pull
things from the published userspace API like that.

Please can we revert the addition of _DEPRECATED to these ioctl
definitions. Perhaps we can add a runtime warning if they actually get
used? Or a compile-time warning if we can manage that?

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index c1411189ba6c..4a535ea1e123 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -865,9 +865,9 @@ struct v4l2_querymenu
 #define V4L2_CID_HFLIP			(V4L2_CID_BASE+20)
 #define V4L2_CID_VFLIP			(V4L2_CID_BASE+21)
 
-/* Deprecated, use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */
-#define V4L2_CID_HCENTER_DEPRECATED	(V4L2_CID_BASE+22)
-#define V4L2_CID_VCENTER_DEPRECATED	(V4L2_CID_BASE+23)
+/* Deprecated; use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */
+#define V4L2_CID_HCENTER		(V4L2_CID_BASE+22)
+#define V4L2_CID_VCENTER		(V4L2_CID_BASE+23)
 
 #define V4L2_CID_POWER_LINE_FREQUENCY	(V4L2_CID_BASE+24)
 enum v4l2_power_line_frequency {
-- 
cgit v1.2.3


From 2f5997140f22f68f6390c49941150d3fa8a95cb7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 27 May 2008 12:10:20 -0300
Subject: KVM: migrate PIT timer

Migrate the PIT timer to the physical CPU which vcpu0 is scheduled on,
similarly to what is done for the LAPIC timers, otherwise PIT interrupts
will be delayed until an unrelated event causes an exit.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c     | 14 +++++++++++++-
 arch/x86/kvm/irq.c       |  6 ++++++
 arch/x86/kvm/irq.h       |  2 ++
 arch/x86/kvm/svm.c       |  2 +-
 arch/x86/kvm/vmx.c       |  2 +-
 arch/x86/kvm/x86.c       |  2 +-
 include/linux/kvm_host.h |  2 +-
 7 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 7c077a9d9777..f2f5d260874e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,7 +200,6 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	atomic_inc(&pt->pending);
 	smp_mb__after_atomic_inc();
-	/* FIXME: handle case where the guest is in guest mode */
 	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
 		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(&vcpu0->wq);
@@ -237,6 +236,19 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 		return HRTIMER_NORESTART;
 }
 
+void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+	struct hrtimer *timer;
+
+	if (vcpu->vcpu_id != 0 || !pit)
+		return;
+
+	timer = &pit->pit_state.pit_timer.timer;
+	if (hrtimer_cancel(timer))
+		hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+}
+
 static void destroy_pit_timer(struct kvm_kpit_timer *pt)
 {
 	pr_debug("pit: execute del timer!\n");
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index ce1f583459b1..76d736b5f664 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -94,3 +94,9 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
+
+void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
+{
+	__kvm_migrate_apic_timer(vcpu);
+	__kvm_migrate_pit_timer(vcpu);
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 1802134b836f..2a15be2275c0 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -84,6 +84,8 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
+void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
+void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
 int pit_has_pending_timer(struct kvm_vcpu *vcpu);
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ab22615eee89..6b0d5fa5bab3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -688,7 +688,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		delta = vcpu->arch.host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
-		kvm_migrate_apic_timer(vcpu);
+		kvm_migrate_timers(vcpu);
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe4db11989c..96445f341519 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -608,7 +608,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
-		kvm_migrate_apic_timer(vcpu);
+		kvm_migrate_timers(vcpu);
 		vpid_sync_vcpu_all(vmx);
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21338bdb28ff..00acf1301a15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2758,7 +2758,7 @@ again:
 
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
-			__kvm_migrate_apic_timer(vcpu);
+			__kvm_migrate_timers(vcpu);
 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
 				       &vcpu->requests)) {
 			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 398978972b7a..092b1b25291d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -297,7 +297,7 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
-static inline void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
 }
-- 
cgit v1.2.3


From 44d1b980c72db0faf35adb082fb2208351803028 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Thu, 5 Jun 2008 22:46:18 -0700
Subject: Fix various old email addresses for dwmw2

Although if people have questions about ARCnet, perhaps it's _better_
for them to be mailing dwmw2@cam.ac.uk about it...

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/networking/arcnet.txt   | 2 +-
 arch/frv/kernel/cmode.S               | 2 +-
 arch/frv/kernel/sleep.S               | 2 +-
 arch/frv/mb93090-mb00/pci-dma-nommu.c | 2 +-
 drivers/mtd/redboot.c                 | 2 +-
 fs/afs/callback.c                     | 2 +-
 fs/afs/inode.c                        | 2 +-
 fs/afs/super.c                        | 2 +-
 include/linux/mtd/nand.h              | 2 +-
 include/linux/tty.h                   | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/arcnet.txt b/Documentation/networking/arcnet.txt
index 770fc41a78e8..796012540386 100644
--- a/Documentation/networking/arcnet.txt
+++ b/Documentation/networking/arcnet.txt
@@ -46,7 +46,7 @@ These are the ARCnet drivers for Linux.
 
 
 This new release (2.91) has been put together by David Woodhouse 
-<dwmw2@cam.ac.uk>, in an attempt to tidy up the driver after adding support 
+<dwmw2@infradead.org>, in an attempt to tidy up the driver after adding support
 for yet another chipset. Now the generic support has been separated from the
 individual chipset drivers, and the source files aren't quite so packed with
 #ifdefs! I've changed this file a bit, but kept it in the first person from
diff --git a/arch/frv/kernel/cmode.S b/arch/frv/kernel/cmode.S
index 81ba28ad2207..53deeb5d7e87 100644
--- a/arch/frv/kernel/cmode.S
+++ b/arch/frv/kernel/cmode.S
@@ -1,7 +1,7 @@
 /* cmode.S: clock mode management
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Woodhouse (dwmw2@redhat.com)
+ * Written by David Woodhouse (dwmw2@infradead.org)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/arch/frv/kernel/sleep.S b/arch/frv/kernel/sleep.S
index c9b2d51ab9ad..f67bf73cd2cc 100644
--- a/arch/frv/kernel/sleep.S
+++ b/arch/frv/kernel/sleep.S
@@ -1,7 +1,7 @@
 /* sleep.S: power saving mode entry
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Woodhouse (dwmw2@redhat.com)
+ * Written by David Woodhouse (dwmw2@infradead.org)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c
index 4985466b1a7c..64ee58d748be 100644
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -1,7 +1,7 @@
 /* pci-dma-nommu.c: Dynamic DMA mapping support for the FRV
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Woodhouse (dwmw2@redhat.com)
+ * Written by David Woodhouse (dwmw2@infradead.org)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index 47474903263c..c5030f94f04e 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -295,5 +295,5 @@ module_init(redboot_parser_init);
 module_exit(redboot_parser_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Red Hat, Inc. - David Woodhouse <dwmw2@cambridge.redhat.com>");
+MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
 MODULE_DESCRIPTION("Parsing code for RedBoot Flash Image System (FIS) tables");
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index a78d5b236bb1..587ef5123cd8 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -8,7 +8,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>
  *          David Howells <dhowells@redhat.com>
  *
  */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 08db82e1343a..bb47217f6a18 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -8,7 +8,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>
  *          David Howells <dhowells@redhat.com>
  *
  */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 4b572b801d8d..7e3faeef6818 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -10,7 +10,7 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  * Authors: David Howells <dhowells@redhat.com>
- *          David Woodhouse <dwmw2@redhat.com>
+ *          David Woodhouse <dwmw2@infradead.org>
  *
  */
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c42bc7f533a5..53ea3dc8b0e8 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -1,7 +1,7 @@
 /*
  *  linux/include/linux/mtd/nand.h
  *
- *  Copyright (c) 2000 David Woodhouse <dwmw2@mvhi.com>
+ *  Copyright (c) 2000 David Woodhouse <dwmw2@infradead.org>
  *                     Steven J. Hill <sjhill@realitydiluted.com>
  *		       Thomas Gleixner <tglx@linutronix.de>
  *
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 7f7121f9c968..324a3b231d40 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -36,7 +36,7 @@
 #define N_6PACK		7
 #define N_MASC		8	/* Reserved for Mobitex module <kaz@cafe.net> */
 #define N_R3964		9	/* Reserved for Simatic R3964 module */
-#define N_PROFIBUS_FDL	10	/* Reserved for Profibus <Dave@mvhi.com> */
+#define N_PROFIBUS_FDL	10	/* Reserved for Profibus */
 #define N_IRDA		11	/* Linux IrDa - http://irda.sourceforge.net/ */
 #define N_SMSBLOCK	12	/* SMS block mode - for talking to GSM data */
 				/* cards about SMS messages */
-- 
cgit v1.2.3


From 3527fb326f07bc8e85cf66d4f987ebeea24e8e4a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 5 Jun 2008 22:46:19 -0700
Subject: lib: export bitrev16

Bluetooth will be able to use this.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitrev.h | 1 +
 lib/bitrev.c           | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h
index 05e540d6963a..7ffe03f4693d 100644
--- a/include/linux/bitrev.h
+++ b/include/linux/bitrev.h
@@ -10,6 +10,7 @@ static inline u8 bitrev8(u8 byte)
 	return byte_rev_table[byte];
 }
 
+extern u16 bitrev16(u16 in);
 extern u32 bitrev32(u32 in);
 
 #endif /* _LINUX_BITREV_H */
diff --git a/lib/bitrev.c b/lib/bitrev.c
index 989aff73f881..3956203456d4 100644
--- a/lib/bitrev.c
+++ b/lib/bitrev.c
@@ -42,10 +42,11 @@ const u8 byte_rev_table[256] = {
 };
 EXPORT_SYMBOL_GPL(byte_rev_table);
 
-static __always_inline u16 bitrev16(u16 x)
+u16 bitrev16(u16 x)
 {
 	return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8);
 }
+EXPORT_SYMBOL(bitrev16);
 
 /**
  * bitrev32 - reverse the order of bits in a u32 value
-- 
cgit v1.2.3


From 93b071139a956e51c98cdefd50a47981a4eb852e Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 5 Jun 2008 22:46:21 -0700
Subject: introduce memory_read_from_buffer()

This patch introduces memory_read_from_buffer().

The only difference between memory_read_from_buffer() and
simple_read_from_buffer() is which address space the function copies to.

simple_read_from_buffer copies to user space memory.
memory_read_from_buffer copies to normal memory.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Doug Warzecha <Douglas_Warzecha@dell.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Cc: Abhay Salunke <Abhay_Salunke@dell.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Markus Rechberger <markus.rechberger@amd.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Bob Moore <robert.moore@intel.com>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Len Brown <lenb@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Cc: Michael Holzheu <holzheu@de.ibm.com>
Cc: Brian King <brking@us.ibm.com>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Andrew Vasquez <linux-driver@qlogic.com>
Cc: Seokmann Ju <seokmann.ju@qlogic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/libfs.c         | 18 ++++++++++++++++++
 include/linux/fs.h |  5 ++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index b004dfadd891..892d41cb3382 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -528,6 +528,23 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
 	return count;
 }
 
+ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+				const void *from, size_t available)
+{
+	loff_t pos = *ppos;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= available)
+		return 0;
+	if (count > available - pos)
+		count = available - pos;
+	memcpy(to, from + pos, count);
+	*ppos = pos + count;
+
+	return count;
+}
+
 /*
  * Transaction based IO.
  * The file expects a single write which triggers the transaction, and then
@@ -800,6 +817,7 @@ EXPORT_SYMBOL(simple_statfs);
 EXPORT_SYMBOL(simple_sync_file);
 EXPORT_SYMBOL(simple_unlink);
 EXPORT_SYMBOL(simple_read_from_buffer);
+EXPORT_SYMBOL(memory_read_from_buffer);
 EXPORT_SYMBOL(simple_transaction_get);
 EXPORT_SYMBOL(simple_transaction_read);
 EXPORT_SYMBOL(simple_transaction_release);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f413085f748e..d490779f18d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2000,7 +2000,10 @@ extern int simple_fill_super(struct super_block *, int, struct tree_descr *);
 extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
 extern void simple_release_fs(struct vfsmount **mount, int *count);
 
-extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const void *, size_t);
+extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
+			loff_t *ppos, const void *from, size_t available);
+extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+			const void *from, size_t available);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-- 
cgit v1.2.3


From 68aa0a206a7a2dd8655a50b36e8274eb87b84544 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Thu, 5 Jun 2008 22:46:36 -0700
Subject: ipc: restore MSGPOOL original value

When posting:

	[PATCH 1/8] Scaling msgmni to the amount of lowmem

(see http://article.gmane.org/gmane.linux.kernel/637849/) I changed the
MSGPOOL value to make it fit what is said in the man pages (i.e.  a size
in bytes).

But Michael Kerrisk rightly complained that this change could affect the
ABI.  So I'm posting this patch to make MSGPOOL expressed back in Kbytes.
Michael, on his side, has fixed the man page.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Acked-by: Michael Kerrisk <mtk.manpages@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 6f3b8e79a991..56abf1558fdd 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -64,11 +64,11 @@ struct msginfo {
 #define MSGMNB 16384   /* <= INT_MAX */   /* default max size of a message queue */
 
 /* unused */
-#define MSGPOOL (MSGMNI * MSGMNB) /* size in bytes of message pool */
+#define MSGPOOL (MSGMNI * MSGMNB / 1024) /* size in kbytes of message pool */
 #define MSGTQL  MSGMNB            /* number of system message headers */
 #define MSGMAP  MSGMNB            /* number of entries in message map */
 #define MSGSSZ  16                /* message segment size */
-#define __MSGSEG (MSGPOOL / MSGSSZ) /* max no. of segments */
+#define __MSGSEG ((MSGPOOL * 1024) / MSGSSZ) /* max no. of segments */
 #define MSGSEG (__MSGSEG <= 0xffff ? __MSGSEG : 0xffff)
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 979b0fea2d9ae5d57237a368d571cbc84655fba6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 5 Jun 2008 22:47:00 -0700
Subject: vm: add kzalloc_node() inline

To get zeroed out memory from a particular NUMA node.  To be used by
sunrpc.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 805ed4b92f9a..c2ad35016599 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -276,6 +276,17 @@ static inline void *kzalloc(size_t size, gfp_t flags)
 	return kmalloc(size, flags | __GFP_ZERO);
 }
 
+/**
+ * kzalloc_node - allocate zeroed memory from a particular memory node.
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate (see kmalloc).
+ * @node: memory node from which to allocate
+ */
+static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
+{
+	return kmalloc_node(size, flags | __GFP_ZERO, node);
+}
+
 #ifdef CONFIG_SLABINFO
 extern const struct seq_operations slabinfo_op;
 ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From f751aa125d1843ea4a9a264b451fd5b1639fab20 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Sun, 8 Jun 2008 21:43:10 +0300
Subject: fat_valid_media() isn't for userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 73f20e58b1d586e9f6d3ddc3aad872829aca7743 ("FAT_VALID_MEDIA():
remove pointless test") wrongly added the new fat_valid_media() function
to the userspace-visible part of include/linux/msdos_fs.h

Move it to the part of include/linux/msdos_fs.h that is not exported to
userspace.

Reported-by: Onur Küçük <onur@pardus.org.tr>
Reported-by: S.Çağlar Onur <caglar@pardus.org.tr>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msdos_fs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index b03b27457413..81cd36b735b0 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -57,12 +57,6 @@
 #define MSDOS_DOT	".          "	/* ".", padded to MSDOS_NAME chars */
 #define MSDOS_DOTDOT	"..         "	/* "..", padded to MSDOS_NAME chars */
 
-/* media of boot sector */
-static inline int fat_valid_media(u8 media)
-{
-	return 0xf8 <= media || media == 0xf0;
-}
-
 #define FAT_FIRST_ENT(s, x)	((MSDOS_SB(s)->fat_bits == 32 ? 0x0FFFFF00 : \
 	MSDOS_SB(s)->fat_bits == 16 ? 0xFF00 : 0xF00) | (x))
 
@@ -334,6 +328,12 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
 #endif
 }
 
+/* media of boot sector */
+static inline int fat_valid_media(u8 media)
+{
+	return 0xf8 <= media || media == 0xf0;
+}
+
 /* fat/cache.c */
 extern void fat_cache_inval_inode(struct inode *inode);
 extern int fat_get_cluster(struct inode *inode, int cluster,
-- 
cgit v1.2.3


From 0d5799449f0f373ca12681d86c941ae464146a37 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Wed, 4 Jun 2008 08:30:54 +1000
Subject: [POWERPC] Make walk_memory_resource available with MEMORY_HOTPLUG=n

The ehea driver was recently changed[1] to use walk_memory_resource() to
detect the system's memory layout.  However, walk_memory_resource() is
available only when memory hotplug is enabled.  So CONFIG_EHEA was
made to depend on MEMORY_HOTPLUG [2], but it is inappropriate for a
network driver to have such a dependency.

Make the declaration of walk_memory_resource() and its powerpc
implementation (ehea is powerpc-specific) unconditionally available.

[1] 48cfb14f8b89d4d5b3df6c16f08b258686fb12ad
    "ehea: Add DLPAR memory remove support"

[2] fb7b6ca2b6b7c23b52be143bdd5f55a23b9780c8
    "ehea: Add dependency to Kconfig"

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Acked-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/mem.c          |  3 +--
 include/linux/memory_hotplug.h | 16 ++++++++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f67e118116fa..51f82d83bf14 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -151,6 +151,7 @@ out:
 	return ret;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
+#endif /* CONFIG_MEMORY_HOTPLUG */
 
 /*
  * walk_memory_resource() needs to make sure there is no holes in a given
@@ -184,8 +185,6 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
 }
 EXPORT_SYMBOL_GPL(walk_memory_resource);
 
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
 void show_mem(void)
 {
 	unsigned long total = 0, reserved = 0;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 73e358612eaf..ea9f5ad9ec8e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -77,14 +77,6 @@ extern int __add_pages(struct zone *zone, unsigned long start_pfn,
 extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
 
-/*
- * Walk through all memory which is registered as resource.
- * arg is (start_pfn, nr_pages, private_arg_pointer)
- */
-extern int walk_memory_resource(unsigned long start_pfn,
-			unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *));
-
 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
 #else
@@ -199,6 +191,14 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
+/*
+ * Walk through all memory which is registered as resource.
+ * arg is (start_pfn, nr_pages, private_arg_pointer)
+ */
+extern int walk_memory_resource(unsigned long start_pfn,
+			unsigned long nr_pages, void *arg,
+			int (*func)(unsigned long, unsigned long, void *));
+
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int remove_memory(u64 start, u64 size);
-- 
cgit v1.2.3


From dfa7e20cc0d1a7a620def4dce97de1ae5375f99b Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Mon, 9 Jun 2008 11:18:45 -0500
Subject: mm: Minor clean-up of page flags in mm/page_alloc.c

Minor source code cleanup of page flags in mm/page_alloc.c.
Move the definition of the groups of bits to page-flags.h.

The purpose of this clean up is that the next patch will
conditionally add a page flag to the groups.  Doing that
in a header file is cleaner than adding #ifdefs to the
C code.

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 24 ++++++++++++++++++++++++
 mm/page_alloc.c            | 34 +++-------------------------------
 2 files changed, 27 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 590cff32415d..f31debfac926 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -306,5 +306,29 @@ static inline void __ClearPageTail(struct page *page)
 }
 
 #endif /* !PAGEFLAGS_EXTENDED */
+
+#define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
+			 1 << PG_buddy | 1 << PG_writeback | \
+			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active)
+
+/*
+ * Flags checked in bad_page().  Pages on the free list should not have
+ * these flags set.  It they are, there is a problem.
+ */
+#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty)
+
+/*
+ * Flags checked when a page is freed.  Pages being freed should not have
+ * these flags set.  It they are, there is a problem.
+ */
+#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved)
+
+/*
+ * Flags checked when a page is prepped for return by the page allocator.
+ * Pages being prepped should not have these flags set.  It they are, there
+ * is a problem.
+ */
+#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty)
+
 #endif /* !__GENERATING_BOUNDS_H */
 #endif	/* PAGE_FLAGS_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e83f02cd2d3..2f552955a02f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -237,16 +237,7 @@ static void bad_page(struct page *page)
 	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
 		KERN_EMERG "Backtrace:\n");
 	dump_stack();
-	page->flags &= ~(1 << PG_lru	|
-			1 << PG_private |
-			1 << PG_locked	|
-			1 << PG_active	|
-			1 << PG_dirty	|
-			1 << PG_reclaim |
-			1 << PG_slab    |
-			1 << PG_swapcache |
-			1 << PG_writeback |
-			1 << PG_buddy );
+	page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD;
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
 	page->mapping = NULL;
@@ -463,16 +454,7 @@ static inline int free_pages_check(struct page *page)
 		(page->mapping != NULL)  |
 		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
-		(page->flags & (
-			1 << PG_lru	|
-			1 << PG_private |
-			1 << PG_locked	|
-			1 << PG_active	|
-			1 << PG_slab	|
-			1 << PG_swapcache |
-			1 << PG_writeback |
-			1 << PG_reserved |
-			1 << PG_buddy ))))
+		(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
 		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
@@ -616,17 +598,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 		(page->mapping != NULL)  |
 		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
-		(page->flags & (
-			1 << PG_lru	|
-			1 << PG_private	|
-			1 << PG_locked	|
-			1 << PG_active	|
-			1 << PG_dirty	|
-			1 << PG_slab    |
-			1 << PG_swapcache |
-			1 << PG_writeback |
-			1 << PG_reserved |
-			1 << PG_buddy ))))
+		(page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
 		bad_page(page);
 
 	/*
-- 
cgit v1.2.3


From 585c5434f0e02ff0ffc567ec223af61e2d8e2e88 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Thu, 5 Jun 2008 21:29:49 +0300
Subject: include/linux/ssb/ssb_driver_gige.h typo fix

This patch fixes a typo in the name of a config variable.

Reported-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Reviewed-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ssb/ssb_driver_gige.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 01fbdf5fef22..942e38736901 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -100,7 +100,7 @@ extern char * nvram_get(const char *name);
 /* Get the device MAC address */
 static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
-#ifdef CONFIG_BCM947XX
+#ifdef CONFIG_BCM47XX
 	char *res = nvram_get("et0macaddr");
 	if (res)
 		memcpy(macaddr, res, 6);
-- 
cgit v1.2.3


From 16882c1e962b4be5122fc05aaf2afc10fd9e2d15 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 8 Jun 2008 21:20:41 +0400
Subject: sched: fix TASK_WAKEKILL vs SIGKILL race

schedule() has the special "TASK_INTERRUPTIBLE && signal_pending()" case,
this allows us to do

	current->state = TASK_INTERRUPTIBLE;
	schedule();

without fear to sleep with pending signal.

However, the code like

	current->state = TASK_KILLABLE;
	schedule();

is not right, schedule() doesn't take TASK_WAKEKILL into account. This means
that mutex_lock_killable(), wait_for_completion_killable(), down_killable(),
schedule_timeout_killable() can miss SIGKILL (and btw the second SIGKILL has
no effect).

Introduce the new helper, signal_pending_state(), and change schedule() to
use it. Hopefully it will have more users, that is why the task's state is
passed separately.

Note this "__TASK_STOPPED | __TASK_TRACED" check in signal_pending_state().
This is needed to preserve the current behaviour (ptrace_notify). I hope
this check will be removed soon, but this (afaics good) change needs the
separate discussion.

The fast path is "(state & (INTERRUPTIBLE | WAKEKILL)) + signal_pending(p)",
basically the same that schedule() does now. However, this patch of course
bloats schedule().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 13 +++++++++++++
 kernel/sched.c        |  6 ++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ae0be3c62375..c5d3f847ca8d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2026,6 +2026,19 @@ static inline int fatal_signal_pending(struct task_struct *p)
 	return signal_pending(p) && __fatal_signal_pending(p);
 }
 
+static inline int signal_pending_state(long state, struct task_struct *p)
+{
+	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
+		return 0;
+	if (!signal_pending(p))
+		return 0;
+
+	if (state & (__TASK_STOPPED | __TASK_TRACED))
+		return 0;
+
+	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
+}
+
 static inline int need_resched(void)
 {
 	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
diff --git a/kernel/sched.c b/kernel/sched.c
index bfb8ad8ed171..2c65bf29d133 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4159,12 +4159,10 @@ need_resched_nonpreemptible:
 	clear_tsk_need_resched(prev);
 
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
-				signal_pending(prev))) {
+		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
-		} else {
+		else
 			deactivate_task(rq, prev, 1);
-		}
 		switch_count = &prev->nvcsw;
 	}
 
-- 
cgit v1.2.3


From b76916462d990751882eaeadc75ac8c487d6de1d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 10 Jun 2008 20:56:36 +0200
Subject: ide: remove the ide_etrax100 chipset type

I forgot to remove the ide_etrax100 chipset type when removing the
ETRAX_IDE driver.

Reported-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c | 5 ++---
 drivers/ide/ide-proc.c  | 1 -
 include/linux/ide.h     | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 655ec7ef568a..0bccb63d10a1 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1333,8 +1333,7 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
 static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 			  const struct ide_port_info *d)
 {
-	if (d->chipset != ide_etrax100)
-		hwif->channel = port;
+	hwif->channel = port;
 
 	if (d->chipset)
 		hwif->chipset = d->chipset;
@@ -1519,7 +1518,7 @@ int ide_device_add_all(u8 *idx, const struct ide_port_info *d)
 			continue;
 		}
 
-		if (d->chipset != ide_etrax100 && (i & 1) && mate) {
+		if ((i & 1) && mate) {
 			hwif->mate = mate;
 			mate->mate = hwif;
 		}
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 8d6ad812a014..55ec7f798772 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -63,7 +63,6 @@ static int proc_ide_read_imodel
 	case ide_pmac:		name = "mac-io";	break;
 	case ide_au1xxx:	name = "au1xxx";	break;
 	case ide_palm3710:      name = "palm3710";      break;
-	case ide_etrax100:	name = "etrax100";	break;
 	case ide_acorn:		name = "acorn";		break;
 	default:		name = "(unknown)";	break;
 	}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f8f195c20da2..9918772bf274 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -153,7 +153,7 @@ enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
 		ide_rz1000,	ide_trm290,
 		ide_cmd646,	ide_cy82c693,	ide_4drives,
-		ide_pmac,	ide_etrax100,	ide_acorn,
+		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
 
-- 
cgit v1.2.3


From ce4a7d0d48bbaed78ccbb0bafb9229651a40303a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Jun 2008 12:39:35 -0700
Subject: inet{6}_request_sock: Init ->opt and ->pktopts in the constructor

Wei Yongjun noticed that we may call reqsk_free on request sock objects where
the opt fields may not be initialized, fix it by introducing inet_reqsk_alloc
where we initialize ->opt to NULL and set ->pktopts to NULL in
inet6_reqsk_alloc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h    |  4 +++-
 include/net/inet_sock.h | 10 ++++++++++
 net/dccp/ipv4.c         |  3 +--
 net/dccp/ipv6.c         |  1 -
 net/ipv4/syncookies.c   |  3 +--
 net/ipv4/tcp_ipv4.c     |  2 +-
 net/ipv6/syncookies.c   |  1 -
 net/ipv6/tcp_ipv6.c     |  1 -
 8 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 10b666b61add..cde056e08181 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -396,8 +396,10 @@ static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *op
 {
 	struct request_sock *req = reqsk_alloc(ops);
 
-	if (req != NULL)
+	if (req != NULL) {
 		inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
+		inet6_rsk(req)->pktopts = NULL;
+	}
 
 	return req;
 }
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a42cd63d241a..9fabe5b38912 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -197,4 +197,14 @@ static inline int inet_iif(const struct sk_buff *skb)
 	return skb->rtable->rt_iif;
 }
 
+static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
+{
+	struct request_sock *req = reqsk_alloc(ops);
+
+	if (req != NULL)
+		inet_rsk(req)->opt = NULL;
+
+	return req;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index c22a3780c14e..37d27bcb361f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&dccp_request_sock_ops);
+	req = inet_reqsk_alloc(&dccp_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
@@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = ip_hdr(skb)->daddr;
 	ireq->rmt_addr = ip_hdr(skb)->saddr;
-	ireq->opt	= NULL;
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9b1129bb7ece..f7fe2a572d7b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq6 = inet6_rsk(req);
 	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
-	ireq6->pktopts	= NULL;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d64..d182a2a26291 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		cookie_check_timestamp(&tcp_opt);
 
 	ret = NULL;
-	req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
 		goto out;
 
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->opt		= NULL;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2f..4f8485c67d1a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&tcp_request_sock_ops);
+	req = inet_reqsk_alloc(&tcp_request_sock_ops);
 	if (!req)
 		goto drop;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 938ce4ecde55..3ecc1157994e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
-	ireq6->pktopts = NULL;
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 715965f0fac0..cb46749d4c32 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	treq->pktopts = NULL;
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
-- 
cgit v1.2.3


From 2506ece0c0bbd2fc19a4827b96dc52ea47e2ce4a Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Sun, 8 Jun 2008 20:49:59 +1000
Subject: virtio: Fix typo in virtio_net_hdr comments

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/virtio_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9405aa6cdf26..38c0571820fb 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -38,7 +38,7 @@ struct virtio_net_hdr
 #define VIRTIO_NET_HDR_GSO_ECN		0x80	// TCP has ECN set
 	__u8 gso_type;
 	__u16 hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
-	__u16 gso_size;		/* Bytes to append to gso_hdr_len per frame */
+	__u16 gso_size;		/* Bytes to append to hdr_len per frame */
 	__u16 csum_start;	/* Position to start checksumming from */
 	__u16 csum_offset;	/* Offset after that to place checksum */
 };
-- 
cgit v1.2.3


From 709772e6e06564ed94ba740de70185ac3d792773 Mon Sep 17 00:00:00 2001
From: Krzysztof Piotr Oledzki <ole@ans.pl>
Date: Tue, 10 Jun 2008 15:44:49 -0700
Subject: net: Fix routing tables with id > 255 for legacy software

Most legacy software do not like tables > 255 as rtm_table is u8
so tb_id is sent &0xff and it is possible to mismatch for example
table 510 with table 254 (main).

This patch introduces RT_TABLE_COMPAT=252 so the code uses it if
tb_id > 255. It makes such old applications happy, new
ones are still able to use RTA_TABLE to get a proper table id.

Signed-off-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 1 +
 net/ipv4/fib_semantics.c  | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a2aec2c0cfb5..b358c704d102 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -246,6 +246,7 @@ enum rt_class_t
 {
 	RT_TABLE_UNSPEC=0,
 /* User defined values */
+	RT_TABLE_COMPAT=252,
 	RT_TABLE_DEFAULT=253,
 	RT_TABLE_MAIN=254,
 	RT_TABLE_LOCAL=255,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019fc..0d4d72827e4b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	rtm->rtm_dst_len = dst_len;
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = tos;
-	rtm->rtm_table = tb_id;
+	if (tb_id < 256)
+		rtm->rtm_table = tb_id;
+	else
+		rtm->rtm_table = RT_TABLE_COMPAT;
 	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
-- 
cgit v1.2.3


From dcb84f335bee9c9a7781cfc5d74492dccaf066d2 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 19 May 2008 19:09:27 -0400
Subject: cpuidle acpi driver: fix oops on AC<->DC

cpuidle and acpi driver interaction bug with the way cpuidle_register_driver()
is called. Due to this bug, there will be oops on
AC<->DC on some systems, where they support C-states in one DC and not in AC.

The current code does
ON BOOT:
	Look at CST and other C-state info to see whether more than C1 is
	supported. If it is, then acpi processor_idle does a
	cpuidle_register_driver() call, which internally enables the device.

ON CST change notification (AC<->DC) and on suspend-resume:
	acpi driver temporarily disables device, updates the device with
	any new C-states, and reenables the device.

The problem is is on boot, there are no C2, C3 states supported and we skip
the register. Later on AC<->DC, we may get a CST notification and we try
to reevaluate CST and enabled the device, without actually registering it.
This causes breakage as we try to create /sys fs sub directory, without the
parent directory which is created at register time.

Thanks to Sanjeev for reporting the problem here.
http://bugzilla.kernel.org/show_bug.cgi?id=10394

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_idle.c | 13 +++++++------
 drivers/cpuidle/cpuidle.c     | 40 +++++++++++++++++++++++++++++++++++-----
 include/linux/cpuidle.h       |  1 +
 3 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2dd2c1f3a01c..556ee1585192 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1669,6 +1669,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 		return -EINVAL;
 	}
 
+	dev->cpu = pr->id;
 	for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
 		dev->states[i].name[0] = '\0';
 		dev->states[i].desc[0] = '\0';
@@ -1738,7 +1739,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
 
 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 {
-	int ret;
+	int ret = 0;
 
 	if (boot_option_idle_override)
 		return 0;
@@ -1756,8 +1757,10 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 	cpuidle_pause_and_lock();
 	cpuidle_disable_device(&pr->power.dev);
 	acpi_processor_get_power_info(pr);
-	acpi_processor_setup_cpuidle(pr);
-	ret = cpuidle_enable_device(&pr->power.dev);
+	if (pr->flags.power) {
+		acpi_processor_setup_cpuidle(pr);
+		ret = cpuidle_enable_device(&pr->power.dev);
+	}
 	cpuidle_resume_and_unlock();
 
 	return ret;
@@ -1813,7 +1816,6 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 	if (pr->flags.power) {
 #ifdef CONFIG_CPU_IDLE
 		acpi_processor_setup_cpuidle(pr);
-		pr->power.dev.cpu = pr->id;
 		if (cpuidle_register_device(&pr->power.dev))
 			return -EIO;
 #endif
@@ -1850,8 +1852,7 @@ int acpi_processor_power_exit(struct acpi_processor *pr,
 		return 0;
 
 #ifdef CONFIG_CPU_IDLE
-	if (pr->flags.power)
-		cpuidle_unregister_device(&pr->power.dev);
+	cpuidle_unregister_device(&pr->power.dev);
 #endif
 	pr->flags.power_setup_done = 0;
 
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index fc555a90bb21..23554b676d6e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -38,6 +38,8 @@ static void cpuidle_kick_cpus(void)
 static void cpuidle_kick_cpus(void) {}
 #endif
 
+static int __cpuidle_register_device(struct cpuidle_device *dev);
+
 /**
  * cpuidle_idle_call - the main idle loop
  *
@@ -138,6 +140,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 	if (!dev->state_count)
 		return -EINVAL;
 
+	if (dev->registered == 0) {
+		ret = __cpuidle_register_device(dev);
+		if (ret)
+			return ret;
+	}
+
 	if ((ret = cpuidle_add_state_sysfs(dev)))
 		return ret;
 
@@ -232,10 +240,13 @@ static void poll_idle_init(struct cpuidle_device *dev) {}
 #endif /* CONFIG_ARCH_HAS_CPU_RELAX */
 
 /**
- * cpuidle_register_device - registers a CPU's idle PM feature
+ * __cpuidle_register_device - internal register function called before register
+ * and enable routines
  * @dev: the cpu
+ *
+ * cpuidle_lock mutex must be held before this is called
  */
-int cpuidle_register_device(struct cpuidle_device *dev)
+static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
 	int ret;
 	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
@@ -247,18 +258,34 @@ int cpuidle_register_device(struct cpuidle_device *dev)
 
 	init_completion(&dev->kobj_unregister);
 
-	mutex_lock(&cpuidle_lock);
-
 	poll_idle_init(dev);
 
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
 	if ((ret = cpuidle_add_sysfs(sys_dev))) {
-		mutex_unlock(&cpuidle_lock);
 		module_put(cpuidle_curr_driver->owner);
 		return ret;
 	}
 
+	dev->registered = 1;
+	return 0;
+}
+
+/**
+ * cpuidle_register_device - registers a CPU's idle PM feature
+ * @dev: the cpu
+ */
+int cpuidle_register_device(struct cpuidle_device *dev)
+{
+	int ret;
+
+	mutex_lock(&cpuidle_lock);
+
+	if ((ret = __cpuidle_register_device(dev))) {
+		mutex_unlock(&cpuidle_lock);
+		return ret;
+	}
+
 	cpuidle_enable_device(dev);
 	cpuidle_install_idle_handler();
 
@@ -278,6 +305,9 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
 {
 	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
 
+	if (dev->registered == 0)
+		return;
+
 	cpuidle_pause_and_lock();
 
 	cpuidle_disable_device(dev);
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 51e6b1e520e6..dcf77fa826b5 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -82,6 +82,7 @@ struct cpuidle_state_kobj {
 };
 
 struct cpuidle_device {
+	unsigned int		registered:1;
 	unsigned int		enabled:1;
 	unsigned int		cpu;
 
-- 
cgit v1.2.3


From e9fe9e188118a0a34c6200d9b10ea6247f53592d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 9 Jun 2008 16:52:04 -0700
Subject: pnpacpi: fix IRQ flag decoding

When decoding IRQ trigger mode and polarity, it is not enough to mask by
IORESOURCE_BITS because there are now additional bits defined.  For
example, if IORESOURCE_IRQ_SHAREABLE was set, we failed to set *triggering
and *polarity at all.

I can't point to a failure that this patch fixes, but
bugs in this area have caused problems when resuming after
suspend, for example:

    http://bugzilla.kernel.org/show_bug.cgi?id=6316
    http://bugzilla.kernel.org/show_bug.cgi?id=9487
    https://bugs.launchpad.net/ubuntu/+source/linux-source-2.6.22/+bug/152187

This is based on a patch by Tom Jaeger:
    http://bugzilla.kernel.org/show_bug.cgi?id=9487#c32

[rene.herman@keyaccess.nl: fix comment]
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/pnp/pnpacpi/rsparser.c | 16 ++++++++++++----
 include/linux/ioport.h         |  6 +++---
 2 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 0201c8adfda7..ab09fe6fe1e4 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -56,9 +56,11 @@ static int irq_flags(int triggering, int polarity, int shareable)
 	return flags;
 }
 
-static void decode_irq_flags(int flag, int *triggering, int *polarity)
+static void decode_irq_flags(struct pnp_dev *dev, int flags, int *triggering,
+			     int *polarity)
 {
-	switch (flag) {
+	switch (flags & (IORESOURCE_IRQ_LOWLEVEL | IORESOURCE_IRQ_HIGHLEVEL |
+			 IORESOURCE_IRQ_LOWEDGE  | IORESOURCE_IRQ_HIGHEDGE)) {
 	case IORESOURCE_IRQ_LOWLEVEL:
 		*triggering = ACPI_LEVEL_SENSITIVE;
 		*polarity = ACPI_ACTIVE_LOW;
@@ -75,6 +77,12 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity)
 		*triggering = ACPI_EDGE_SENSITIVE;
 		*polarity = ACPI_ACTIVE_HIGH;
 		break;
+	default:
+		dev_err(&dev->dev, "can't encode invalid IRQ mode %#x\n",
+			flags);
+		*triggering = ACPI_EDGE_SENSITIVE;
+		*polarity = ACPI_ACTIVE_HIGH;
+		break;
 	}
 }
 
@@ -790,7 +798,7 @@ static void pnpacpi_encode_irq(struct pnp_dev *dev,
 	struct acpi_resource_irq *irq = &resource->data.irq;
 	int triggering, polarity;
 
-	decode_irq_flags(p->flags & IORESOURCE_BITS, &triggering, &polarity);
+	decode_irq_flags(dev, p->flags, &triggering, &polarity);
 	irq->triggering = triggering;
 	irq->polarity = polarity;
 	if (triggering == ACPI_EDGE_SENSITIVE)
@@ -813,7 +821,7 @@ static void pnpacpi_encode_ext_irq(struct pnp_dev *dev,
 	struct acpi_resource_extended_irq *extended_irq = &resource->data.extended_irq;
 	int triggering, polarity;
 
-	decode_irq_flags(p->flags & IORESOURCE_BITS, &triggering, &polarity);
+	decode_irq_flags(dev, p->flags, &triggering, &polarity);
 	extended_irq->producer_consumer = ACPI_CONSUMER;
 	extended_irq->triggering = triggering;
 	extended_irq->polarity = polarity;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index d5d40a9f7929..c6801bffe76d 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -53,14 +53,14 @@ struct resource_list {
 #define IORESOURCE_AUTO		0x40000000
 #define IORESOURCE_BUSY		0x80000000	/* Driver has marked this resource busy */
 
-/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */
+/* PnP IRQ specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IRQ_HIGHEDGE		(1<<0)
 #define IORESOURCE_IRQ_LOWEDGE		(1<<1)
 #define IORESOURCE_IRQ_HIGHLEVEL	(1<<2)
 #define IORESOURCE_IRQ_LOWLEVEL		(1<<3)
 #define IORESOURCE_IRQ_SHAREABLE	(1<<4)
 
-/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
+/* PnP DMA specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_DMA_TYPE_MASK	(3<<0)
 #define IORESOURCE_DMA_8BIT		(0<<0)
 #define IORESOURCE_DMA_8AND16BIT	(1<<0)
@@ -76,7 +76,7 @@ struct resource_list {
 #define IORESOURCE_DMA_TYPEB		(2<<6)
 #define IORESOURCE_DMA_TYPEF		(3<<6)
 
-/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */
+/* PnP memory I/O specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_MEM_WRITEABLE	(1<<0)	/* dup: IORESOURCE_READONLY */
 #define IORESOURCE_MEM_CACHEABLE	(1<<1)	/* dup: IORESOURCE_CACHEABLE */
 #define IORESOURCE_MEM_RANGELENGTH	(1<<2)	/* dup: IORESOURCE_RANGELENGTH */
-- 
cgit v1.2.3


From 45aec1ae72fc592f231e9e73ed9ed4d10cfbc0b5 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Tue, 18 Mar 2008 17:00:22 -0700
Subject: x86: PAT export resource_wc in pci sysfs

For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in
pci /sysfs along with resource (which is uncached).

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/filesystems/sysfs-pci.txt |  1 +
 drivers/pci/pci-sysfs.c                 | 84 ++++++++++++++++++++++++---------
 include/linux/pci.h                     |  1 +
 3 files changed, 64 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 5daa2aaec2c5..68ef48839c04 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -36,6 +36,7 @@ files, each with their own function.
        local_cpus	   nearby CPU mask (cpumask, ro)
        resource		   PCI resource host addresses (ascii, ro)
        resource0..N	   PCI resource N, if present (binary, mmap)
+       resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
        rom		   PCI ROM resource, if present (binary, ro)
        subsystem_device	   PCI subsystem device (ascii, ro)
        subsystem_vendor	   PCI subsystem vendor (ascii, ro)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 271d41cc05ab..9ec7d3977a82 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -489,13 +489,14 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
  * @kobj: kobject for mapping
  * @attr: struct bin_attribute for the file being mapped
  * @vma: struct vm_area_struct passed into the mmap
+ * @write_combine: 1 for write_combine mapping
  *
  * Use the regular PCI mapping routines to map a PCI resource into userspace.
  * FIXME: write combining?  maybe automatic for prefetchable regions?
  */
 static int
 pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
-		  struct vm_area_struct *vma)
+		  struct vm_area_struct *vma, int write_combine)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
 						       struct device, kobj));
@@ -518,7 +519,21 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	vma->vm_pgoff += start >> PAGE_SHIFT;
 	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
 
-	return pci_mmap_page_range(pdev, vma, mmap_type, 0);
+	return pci_mmap_page_range(pdev, vma, mmap_type, write_combine);
+}
+
+static int
+pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr,
+		     struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+static int
+pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr,
+		     struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 1);
 }
 
 /**
@@ -541,9 +556,46 @@ pci_remove_resource_files(struct pci_dev *pdev)
 			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
 			kfree(res_attr);
 		}
+
+		res_attr = pdev->res_attr_wc[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
 	}
 }
 
+static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
+{
+	/* allocate attribute structure, piggyback attribute name */
+	int name_len = write_combine ? 13 : 10;
+	struct bin_attribute *res_attr;
+	int retval;
+
+	res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC);
+	if (res_attr) {
+		char *res_attr_name = (char *)(res_attr + 1);
+
+		if (write_combine) {
+			pdev->res_attr_wc[num] = res_attr;
+			sprintf(res_attr_name, "resource%d_wc", num);
+			res_attr->mmap = pci_mmap_resource_wc;
+		} else {
+			pdev->res_attr[num] = res_attr;
+			sprintf(res_attr_name, "resource%d", num);
+			res_attr->mmap = pci_mmap_resource_uc;
+		}
+		res_attr->attr.name = res_attr_name;
+		res_attr->attr.mode = S_IRUSR | S_IWUSR;
+		res_attr->size = pci_resource_len(pdev, num);
+		res_attr->private = &pdev->resource[num];
+		retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+	} else
+		retval = -ENOMEM;
+
+	return retval;
+}
+
 /**
  * pci_create_resource_files - create resource files in sysfs for @dev
  * @dev: dev in question
@@ -557,31 +609,19 @@ static int pci_create_resource_files(struct pci_dev *pdev)
 
 	/* Expose the PCI resources from this device as files */
 	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-		struct bin_attribute *res_attr;
 
 		/* skip empty resources */
 		if (!pci_resource_len(pdev, i))
 			continue;
 
-		/* allocate attribute structure, piggyback attribute name */
-		res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC);
-		if (res_attr) {
-			char *res_attr_name = (char *)(res_attr + 1);
-
-			pdev->res_attr[i] = res_attr;
-			sprintf(res_attr_name, "resource%d", i);
-			res_attr->attr.name = res_attr_name;
-			res_attr->attr.mode = S_IRUSR | S_IWUSR;
-			res_attr->size = pci_resource_len(pdev, i);
-			res_attr->mmap = pci_mmap_resource;
-			res_attr->private = &pdev->resource[i];
-			retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
-			if (retval) {
-				pci_remove_resource_files(pdev);
-				return retval;
-			}
-		} else {
-			return -ENOMEM;
+		retval = pci_create_attr(pdev, i, 0);
+		/* for prefetchable resources, create a WC mappable file */
+		if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH)
+			retval = pci_create_attr(pdev, i, 1);
+
+		if (retval) {
+			pci_remove_resource_files(pdev);
+			return retval;
 		}
 	}
 	return 0;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 509159bcd4e7..d18b1dd49fab 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -206,6 +206,7 @@ struct pci_dev {
 	struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
 	int rom_attr_enabled;		/* has display of the rom attribute been enabled? */
 	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
+	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
 #endif
-- 
cgit v1.2.3


From f595ec964daf7f99668039d7303ddedd09a75142 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 12 Jun 2008 10:47:56 +0200
Subject: common implementation of iterative div/mod

We have a few instances of the open-coded iterative div/mod loop, used
when we don't expcet the dividend to be much bigger than the divisor.
Unfortunately modern gcc's have the tendency to strength "reduce" this
into a full mod operation, which isn't necessarily any faster, and
even if it were, doesn't exist if gcc implements it in libgcc.

The workaround is to put a dummy asm statement in the loop to prevent
gcc from performing the transformation.

This patch creates a single implementation of this loop, and uses it
to replace the open-coded versions I know about.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Robert Hancock <hancockr@shaw.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/time.c    | 13 +++----------
 include/linux/math64.h |  2 ++
 include/linux/time.h   | 11 ++---------
 lib/div64.c            | 23 +++++++++++++++++++++++
 4 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c39e1a5aa241..52b2e3856980 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -12,6 +12,7 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/kernel_stat.h>
+#include <linux/math64.h>
 
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
@@ -150,11 +151,7 @@ static void do_stolen_accounting(void)
 	if (stolen < 0)
 		stolen = 0;
 
-	ticks = 0;
-	while (stolen >= NS_PER_TICK) {
-		ticks++;
-		stolen -= NS_PER_TICK;
-	}
+	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
 	__get_cpu_var(residual_stolen) = stolen;
 	account_steal_time(NULL, ticks);
 
@@ -166,11 +163,7 @@ static void do_stolen_accounting(void)
 	if (blocked < 0)
 		blocked = 0;
 
-	ticks = 0;
-	while (blocked >= NS_PER_TICK) {
-		ticks++;
-		blocked -= NS_PER_TICK;
-	}
+	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
 	__get_cpu_var(residual_blocked) = blocked;
 	account_steal_time(idle_task(smp_processor_id()), ticks);
 }
diff --git a/include/linux/math64.h b/include/linux/math64.h
index c1a5f81501ff..177785e1e4a3 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
 }
 #endif
 
+u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);
+
 #endif /* _LINUX_MATH64_H */
diff --git a/include/linux/time.h b/include/linux/time.h
index d32ef0ad4c0a..05f9517a8ed1 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -6,6 +6,7 @@
 #ifdef __KERNEL__
 # include <linux/cache.h>
 # include <linux/seqlock.h>
+# include <linux/math64.h>
 #endif
 
 #ifndef _STRUCT_TIMESPEC
@@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec);
  */
 static inline void timespec_add_ns(struct timespec *a, u64 ns)
 {
-	ns += a->tv_nsec;
-	while(unlikely(ns >= NSEC_PER_SEC)) {
-		/* The following asm() prevents the compiler from
-		 * optimising this loop into a modulo operation.  */
-		asm("" : "+r"(ns));
-
-		ns -= NSEC_PER_SEC;
-		a->tv_sec++;
-	}
+	a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
 	a->tv_nsec = ns;
 }
 #endif /* __KERNEL__ */
diff --git a/lib/div64.c b/lib/div64.c
index bb5bd0c0f030..76c01542d3e1 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64);
 #endif
 
 #endif /* BITS_PER_LONG == 32 */
+
+/*
+ * Iterative div/mod for use when dividend is not expected to be much
+ * bigger than divisor.
+ */
+u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
+{
+	u32 ret = 0;
+
+	while (dividend >= divisor) {
+		/* The following asm() prevents the compiler from
+		   optimising this loop into a modulo operation.  */
+		asm("" : "+rm"(dividend));
+
+		dividend -= divisor;
+		ret++;
+	}
+
+	*remainder = dividend;
+
+	return ret;
+}
+EXPORT_SYMBOL(iter_div_u64_rem);
-- 
cgit v1.2.3


From d5e181f78ac753893eb930868a52a4488cd3de0a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 12 Jun 2008 10:47:58 +0200
Subject: add an inlined version of iter_div_u64_rem

iter_div_u64_rem is used in the x86-64 vdso, which cannot call other
kernel code.  For this case, provide the always_inlined version,
__iter_div_u64_rem.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/math64.h | 19 +++++++++++++++++++
 lib/div64.c            | 15 +--------------
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 177785e1e4a3..c87f1528703a 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -83,4 +83,23 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
 
 u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);
 
+static __always_inline u32
+__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
+{
+	u32 ret = 0;
+
+	while (dividend >= divisor) {
+		/* The following asm() prevents the compiler from
+		   optimising this loop into a modulo operation.  */
+		asm("" : "+rm"(dividend));
+
+		dividend -= divisor;
+		ret++;
+	}
+
+	*remainder = dividend;
+
+	return ret;
+}
+
 #endif /* _LINUX_MATH64_H */
diff --git a/lib/div64.c b/lib/div64.c
index 76c01542d3e1..a111eb8de9cf 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -105,19 +105,6 @@ EXPORT_SYMBOL(div64_u64);
  */
 u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 {
-	u32 ret = 0;
-
-	while (dividend >= divisor) {
-		/* The following asm() prevents the compiler from
-		   optimising this loop into a modulo operation.  */
-		asm("" : "+rm"(dividend));
-
-		dividend -= divisor;
-		ret++;
-	}
-
-	*remainder = dividend;
-
-	return ret;
+	return __iter_div_u64_rem(dividend, divisor, remainder);
 }
 EXPORT_SYMBOL(iter_div_u64_rem);
-- 
cgit v1.2.3


From 9412e28649d0272df5e4af57bb378926fd4df580 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 12 Jun 2008 10:48:00 +0200
Subject: always_inline timespec_add_ns

timespec_add_ns is used from the x86-64 vdso, which cannot call out to
other kernel code.  Make sure that timespec_add_ns is always inlined
(and only uses always_inlined functions) to make sure there are no
unexpected calls.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/time.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 05f9517a8ed1..e15206a7e82e 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -170,10 +170,13 @@ extern struct timeval ns_to_timeval(const s64 nsec);
  * timespec_add_ns - Adds nanoseconds to a timespec
  * @a:		pointer to timespec to be incremented
  * @ns:		unsigned nanoseconds value to be added
+ *
+ * This must always be inlined because its used from the x86-64 vdso,
+ * which cannot call other kernel functions.
  */
-static inline void timespec_add_ns(struct timespec *a, u64 ns)
+static __always_inline void timespec_add_ns(struct timespec *a, u64 ns)
 {
-	a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
+	a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
 	a->tv_nsec = ns;
 }
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From ec0a196626bd12e0ba108d7daa6d95a4fb25c2c5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jun 2008 16:31:35 -0700
Subject: tcp: Revert 'process defer accept as established' changes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts two changesets, ec3c0982a2dd1e671bad8e9d26c28dcba0039d87
("[TCP]: TCP_DEFER_ACCEPT updates - process as established") and
the follow-on bug fix 9ae27e0adbf471c7a6b80102e38e1d5a346b3b38
("tcp: Fix slab corruption with ipv6 and tcp6fuzz").

This change causes several problems, first reported by Ingo Molnar
as a distcc-over-loopback regression where connections were getting
stuck.

Ilpo Järvinen first spotted the locking problems.  The new function
added by this code, tcp_defer_accept_check(), only has the
child socket locked, yet it is modifying state of the parent
listening socket.

Fixing that is non-trivial at best, because we can't simply just grab
the parent listening socket lock at this point, because it would
create an ABBA deadlock.  The normal ordering is parent listening
socket --> child socket, but this code path would require the
reverse lock ordering.

Next is a problem noticed by Vitaliy Gusev, he noted:

----------------------------------------
>--- a/net/ipv4/tcp_timer.c
>+++ b/net/ipv4/tcp_timer.c
>@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
> 		goto death;
> 	}
>
>+	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
>+		tcp_send_active_reset(sk, GFP_ATOMIC);
>+		goto death;

Here socket sk is not attached to listening socket's request queue. tcp_done()
will not call inet_csk_destroy_sock() (and tcp_v4_destroy_sock() which should
release this sk) as socket is not DEAD. Therefore socket sk will be lost for
freeing.
----------------------------------------

Finally, Alexey Kuznetsov argues that there might not even be any
real value or advantage to these new semantics even if we fix all
of the bugs:

----------------------------------------
Hiding from accept() sockets with only out-of-order data only
is the only thing which is impossible with old approach. Is this really
so valuable? My opinion: no, this is nothing but a new loophole
to consume memory without control.
----------------------------------------

So revert this thing for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h             |  7 -------
 include/net/request_sock.h      |  4 ++--
 include/net/tcp.h               |  1 -
 net/ipv4/inet_connection_sock.c | 11 +++++++---
 net/ipv4/tcp.c                  | 18 ++++++++++-------
 net/ipv4/tcp_input.c            | 45 -----------------------------------------
 net/ipv4/tcp_ipv4.c             |  8 --------
 net/ipv4/tcp_minisocks.c        | 32 +++++++++++------------------
 net/ipv4/tcp_timer.c            |  5 -----
 9 files changed, 33 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 18e62e3d406f..b31b6b74aa28 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,11 +239,6 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 	return (struct tcp_request_sock *)req;
 }
 
-struct tcp_deferred_accept_info {
-	struct sock *listen_sk;
-	struct request_sock *request;
-};
-
 struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
@@ -379,8 +374,6 @@ struct tcp_sock {
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 	int			linger2;
 
-	struct tcp_deferred_accept_info defer_tcp_accept;
-
 	unsigned long last_synq_overflow; 
 
 	u32	tso_deferred;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b220b5f624de..0c96e7bed5db 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -115,8 +115,8 @@ struct request_sock_queue {
 	struct request_sock	*rskq_accept_head;
 	struct request_sock	*rskq_accept_tail;
 	rwlock_t		syn_wait_lock;
-	u16			rskq_defer_accept;
-	/* 2 bytes hole, try to pack */
+	u8			rskq_defer_accept;
+	/* 3 bytes hole, try to pack */
 	struct listen_sock	*listen_opt;
 };
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d448310c82c1..cf54034019d9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,7 +139,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define MAX_TCP_KEEPINTVL	32767
 #define MAX_TCP_KEEPCNT		127
 #define MAX_TCP_SYNCNT		127
-#define MAX_TCP_ACCEPT_DEFERRED 65535
 
 #define TCP_SYNQ_INTERVAL	(HZ/5)	/* Period of SYNACK timer */
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff21..045e799d3e1d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	struct inet_connection_sock *icsk = inet_csk(parent);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct listen_sock *lopt = queue->listen_opt;
-	int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
 	unsigned long now = jiffies;
 	struct request_sock **reqp, *req;
 	int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		}
 	}
 
+	if (queue->rskq_defer_accept)
+		max_retries = queue->rskq_defer_accept;
+
 	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
 	i = lopt->clock_hand;
 
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		reqp=&lopt->syn_table[i];
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
-				if (req->retrans < thresh &&
-				    !req->rsk_ops->rtx_syn_ack(parent, req)) {
+				if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
+				    (inet_rsk(req)->acked ||
+				     !req->rsk_ops->rtx_syn_ack(parent, req))) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab66683b8043..fc54a48fde1e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		if (val < 0) {
-			err = -EINVAL;
-		} else {
-			if (val > MAX_TCP_ACCEPT_DEFERRED)
-				val = MAX_TCP_ACCEPT_DEFERRED;
-			icsk->icsk_accept_queue.rskq_defer_accept = val;
+		icsk->icsk_accept_queue.rskq_defer_accept = 0;
+		if (val > 0) {
+			/* Translate value in seconds to number of
+			 * retransmits */
+			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
+			       val > ((TCP_TIMEOUT_INIT / HZ) <<
+				       icsk->icsk_accept_queue.rskq_defer_accept))
+				icsk->icsk_accept_queue.rskq_defer_accept++;
+			icsk->icsk_accept_queue.rskq_defer_accept++;
 		}
 		break;
 
@@ -2299,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = icsk->icsk_accept_queue.rskq_defer_accept;
+		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eba873e9b560..cad73b7dfef0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 	}
 }
 
-static int tcp_defer_accept_check(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (tp->defer_tcp_accept.request) {
-		int queued_data =  tp->rcv_nxt - tp->copied_seq;
-		int hasfin =  !skb_queue_empty(&sk->sk_receive_queue) ?
-			tcp_hdr((struct sk_buff *)
-				sk->sk_receive_queue.prev)->fin : 0;
-
-		if (queued_data && hasfin)
-			queued_data--;
-
-		if (queued_data &&
-		    tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
-			if (sock_flag(sk, SOCK_KEEPOPEN)) {
-				inet_csk_reset_keepalive_timer(sk,
-							       keepalive_time_when(tp));
-			} else {
-				inet_csk_delete_keepalive_timer(sk);
-			}
-
-			inet_csk_reqsk_queue_add(
-				tp->defer_tcp_accept.listen_sk,
-				tp->defer_tcp_accept.request,
-				sk);
-
-			tp->defer_tcp_accept.listen_sk->sk_data_ready(
-				tp->defer_tcp_accept.listen_sk, 0);
-
-			sock_put(tp->defer_tcp_accept.listen_sk);
-			sock_put(sk);
-			tp->defer_tcp_accept.listen_sk = NULL;
-			tp->defer_tcp_accept.request = NULL;
-		} else if (hasfin ||
-			   tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
-			tcp_reset(sk);
-			return -1;
-		}
-	}
-	return 0;
-}
-
 static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4944,8 +4901,6 @@ step5:
 
 	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
-
-	tcp_defer_accept_check(sk);
 	return 0;
 
 csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4f8485c67d1a..97a230026e13 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
-	if (tp->defer_tcp_accept.request) {
-		reqsk_free(tp->defer_tcp_accept.request);
-		sock_put(tp->defer_tcp_accept.listen_sk);
-		sock_put(sk);
-		tp->defer_tcp_accept.listen_sk = NULL;
-		tp->defer_tcp_accept.request = NULL;
-	}
-
 	atomic_dec(&tcp_sockets_allocated);
 
 	return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	   does sequence test, SYN is truncated, and thus we consider
 	   it a bare ACK.
 
-	   Both ends (listening sockets) accept the new incoming
-	   connection and try to talk to each other. 8-)
+	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
+	   bare ACK.  Otherwise, we create an established connection.  Both
+	   ends (listening sockets) accept the new incoming connection and try
+	   to talk to each other. 8-)
 
 	   Note: This case is both harmless, and rare.  Possibility is about the
 	   same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		if (!(flg & TCP_FLAG_ACK))
 			return NULL;
 
+		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+			inet_rsk(req)->acked = 1;
+			return NULL;
+		}
+
 		/* OK, ACK is valid, create big socket and
 		 * feed this segment to it. It will repeat all
 		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		inet_csk_reqsk_queue_unlink(sk, req, prev);
 		inet_csk_reqsk_queue_removed(sk, req);
 
-		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
-		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-
-			/* the accept queue handling is done is est recv slow
-			 * path so lets make sure to start there
-			 */
-			tcp_sk(child)->pred_flags = 0;
-			sock_hold(sk);
-			sock_hold(child);
-			tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
-			tcp_sk(child)->defer_tcp_accept.request = req;
-
-			inet_csk_reset_keepalive_timer(child,
-						       inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
-		} else {
-			inet_csk_reqsk_queue_add(sk, req, child);
-		}
-
+		inet_csk_reqsk_queue_add(sk, req, child);
 		return child;
 
 	listen_overflow:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2aa..63ed9d6830e7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
 		goto death;
 	}
 
-	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
-		tcp_send_active_reset(sk, GFP_ATOMIC);
-		goto death;
-	}
-
 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 		goto out;
 
-- 
cgit v1.2.3


From 57d3c64fd8130ebdacd85a36c9656ba5e221f3a3 Mon Sep 17 00:00:00 2001
From: Ben Nizette <bn@niasdigital.com>
Date: Thu, 12 Jun 2008 15:21:31 -0700
Subject: proc_fs.h: move struct mm_struct forward-declaration

Move the forward-declaration of struct mm_struct a little way up
proc_fs.h.  This fixes a bunch of "'struct mm_struct' declared inside
parameter list" warnings with CONFIG_PROC_FS=n

Signed-off-by: Ben Nizette <bn@niasdigital.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9883bc942262..fff1d27ddb4c 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -9,6 +9,8 @@
 
 struct net;
 struct completion;
+struct mm_struct;
+
 /*
  * The proc filesystem constants/structures
  */
@@ -101,8 +103,6 @@ extern spinlock_t proc_subdir_lock;
 extern void proc_root_init(void);
 extern void proc_misc_init(void);
 
-struct mm_struct;
-
 void proc_flush_task(struct task_struct *task);
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
-- 
cgit v1.2.3


From 24aac480e76c6f5d1391ac05c5e9c0eb9b0cd302 Mon Sep 17 00:00:00 2001
From: Mike Miller <mike.miller@hp.com>
Date: Thu, 12 Jun 2008 15:21:34 -0700
Subject: cciss: add new hardware support

Add support for the next generation of HP Smart Array SAS/SATA
controllers.  Shipping date is late Fall 2008.

Bump the driver version to 3.6.20 to reflect the new hardware support from
patch 1 of this set.

Signed-off-by: Mike Miller <mike.miller@hp.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cciss.txt |  5 +++++
 drivers/block/cciss.c   | 21 ++++++++++++++++-----
 include/linux/pci_ids.h |  1 +
 3 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt
index e65736c6b8bc..63e59b8847c5 100644
--- a/Documentation/cciss.txt
+++ b/Documentation/cciss.txt
@@ -21,6 +21,11 @@ This driver is known to work with the following cards:
 	* SA E200
 	* SA E200i
 	* SA E500
+	* SA P212
+	* SA P410
+	* SA P410i
+	* SA P411
+	* SA P812
 
 Detecting drive failures:
 -------------------------
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e336b05fe4a7..5f1e1cc6165a 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -53,15 +53,16 @@
 #include <linux/scatterlist.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 3.6.14)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(3,6,14)
+#define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
 
 /* Embedded module documentation macros - see modules.h */
 MODULE_AUTHOR("Hewlett-Packard Company");
-MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 3.6.14");
+MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-			" SA6i P600 P800 P400 P400i E200 E200i E500");
-MODULE_VERSION("3.6.14");
+			" SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
+			" Smart Array G2 Series SAS/SATA Controllers");
+MODULE_VERSION("3.6.20");
 MODULE_LICENSE("GPL");
 
 #include "cciss_cmd.h"
@@ -90,6 +91,11 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3237},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x323D},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3241},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3243},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3245},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3247},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
 	{PCI_VENDOR_ID_HP,     PCI_ANY_ID,	PCI_ANY_ID, PCI_ANY_ID,
 		PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0},
 	{0,}
@@ -123,6 +129,11 @@ static struct board_type products[] = {
 	{0x3215103C, "Smart Array E200i", &SA5_access, 120},
 	{0x3237103C, "Smart Array E500", &SA5_access, 512},
 	{0x323D103C, "Smart Array P700m", &SA5_access, 512},
+	{0x3241103C, "Smart Array P212", &SA5_access, 384},
+	{0x3243103C, "Smart Array P410", &SA5_access, 384},
+	{0x3245103C, "Smart Array P410i", &SA5_access, 384},
+	{0x3247103C, "Smart Array P411", &SA5_access, 384},
+	{0x3249103C, "Smart Array P812", &SA5_access, 384},
 	{0xFFFF103C, "Unknown Smart Array", &SA5_access, 120},
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9b940e644179..eafc9d6d2b35 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -716,6 +716,7 @@
 #define PCI_DEVICE_ID_HP_CISSA		0x3220
 #define PCI_DEVICE_ID_HP_CISSC		0x3230
 #define PCI_DEVICE_ID_HP_CISSD		0x3238
+#define PCI_DEVICE_ID_HP_CISSE		0x323a
 #define PCI_DEVICE_ID_HP_ZX2_IOC	0x4031
 
 #define PCI_VENDOR_ID_PCTECH		0x1042
-- 
cgit v1.2.3


From 2165009bdf63f79716a36ad545df14c3cdf958b7 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Thu, 12 Jun 2008 15:21:47 -0700
Subject: pagemap: pass mm into pagewalkers

We need this at least for huge page detection for now, because powerpc
needs the vm_area_struct to be able to determine whether a virtual address
is referring to a huge page (its pmd_huge() doesn't work).

It might also come in handy for some of the other users.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 44 +++++++++++++++++++++++++-------------------
 include/linux/mm.h | 17 +++++++++--------
 mm/pagewalk.c      | 42 ++++++++++++++++++++++--------------------
 3 files changed, 56 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 17403629e330..f0df3109343d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -315,9 +315,9 @@ struct mem_size_stats {
 };
 
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			   void *private)
+			   struct mm_walk *walk)
 {
-	struct mem_size_stats *mss = private;
+	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
@@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	return 0;
 }
 
-static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
-
 static int show_smap(struct seq_file *m, void *v)
 {
 	struct vm_area_struct *vma = v;
 	struct mem_size_stats mss;
 	int ret;
+	struct mm_walk smaps_walk = {
+		.pmd_entry = smaps_pte_range,
+		.mm = vma->vm_mm,
+		.private = &mss,
+	};
 
 	memset(&mss, 0, sizeof mss);
 	mss.vma = vma;
 	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-		walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
-				&smaps_walk, &mss);
+		walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
 
 	ret = show_map(m, v);
 	if (ret)
@@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = {
 };
 
 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
-				unsigned long end, void *private)
+				unsigned long end, struct mm_walk *walk)
 {
-	struct vm_area_struct *vma = private;
+	struct vm_area_struct *vma = walk->private;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
 	struct page *page;
@@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	return 0;
 }
 
-static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
-
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
@@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		return -ESRCH;
 	mm = get_task_mm(task);
 	if (mm) {
+		static struct mm_walk clear_refs_walk;
+		memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
+		clear_refs_walk.pmd_entry = clear_refs_pte_range;
+		clear_refs_walk.mm = mm;
 		down_read(&mm->mmap_sem);
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			clear_refs_walk.private = vma;
 			if (!is_vm_hugetlb_page(vma))
-				walk_page_range(mm, vma->vm_start, vma->vm_end,
-						&clear_refs_walk, vma);
+				walk_page_range(vma->vm_start, vma->vm_end,
+						&clear_refs_walk);
+		}
 		flush_tlb_mm(mm);
 		up_read(&mm->mmap_sem);
 		mmput(mm);
@@ -528,9 +534,9 @@ static int add_to_pagemap(unsigned long addr, u64 pfn,
 }
 
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
-				void *private)
+				struct mm_walk *walk)
 {
-	struct pagemapread *pm = private;
+	struct pagemapread *pm = walk->private;
 	unsigned long addr;
 	int err = 0;
 	for (addr = start; addr < end; addr += PAGE_SIZE) {
@@ -548,9 +554,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
 }
 
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			     void *private)
+			     struct mm_walk *walk)
 {
-	struct pagemapread *pm = private;
+	struct pagemapread *pm = walk->private;
 	pte_t *pte;
 	int err = 0;
 
@@ -675,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		 * user buffer is tracked in "pm", and the walk
 		 * will stop when we hit the end of the buffer.
 		 */
-		ret = walk_page_range(mm, start_vaddr, end_vaddr,
-					&pagemap_walk, &pm);
+		ret = walk_page_range(start_vaddr, end_vaddr,
+					&pagemap_walk);
 		if (ret == PM_END_OF_BUFFER)
 			ret = 0;
 		/* don't need mmap_sem for these, but this looks cleaner */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c31a9cd2a30e..586a943cab01 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -760,16 +760,17 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
  * (see walk_page_range for more details)
  */
 struct mm_walk {
-	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
-	int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
-	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
-	int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
-	int (*pte_hole)(unsigned long, unsigned long, void *);
+	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pud_entry)(pud_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
+	struct mm_struct *mm;
+	void *private;
 };
 
-int walk_page_range(const struct mm_struct *, unsigned long addr,
-		    unsigned long end, const struct mm_walk *walk,
-		    void *private);
+int walk_page_range(unsigned long addr, unsigned long end,
+		struct mm_walk *walk);
 void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 0afd2387e507..d5878bed7841 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -3,14 +3,14 @@
 #include <linux/sched.h>
 
 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			  const struct mm_walk *walk, void *private)
+			  struct mm_walk *walk)
 {
 	pte_t *pte;
 	int err = 0;
 
 	pte = pte_offset_map(pmd, addr);
 	for (;;) {
-		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private);
+		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
 		if (err)
 		       break;
 		addr += PAGE_SIZE;
@@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 }
 
 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
-			  const struct mm_walk *walk, void *private)
+			  struct mm_walk *walk)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, private);
+				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 		if (walk->pmd_entry)
-			err = walk->pmd_entry(pmd, addr, next, private);
+			err = walk->pmd_entry(pmd, addr, next, walk);
 		if (!err && walk->pte_entry)
-			err = walk_pte_range(pmd, addr, next, walk, private);
+			err = walk_pte_range(pmd, addr, next, walk);
 		if (err)
 			break;
 	} while (pmd++, addr = next, addr != end);
@@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 }
 
 static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
-			  const struct mm_walk *walk, void *private)
+			  struct mm_walk *walk)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, private);
+				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 		if (walk->pud_entry)
-			err = walk->pud_entry(pud, addr, next, private);
+			err = walk->pud_entry(pud, addr, next, walk);
 		if (!err && (walk->pmd_entry || walk->pte_entry))
-			err = walk_pmd_range(pud, addr, next, walk, private);
+			err = walk_pmd_range(pud, addr, next, walk);
 		if (err)
 			break;
 	} while (pud++, addr = next, addr != end);
@@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  * @addr: starting address
  * @end: ending address
  * @walk: set of callbacks to invoke for each level of the tree
- * @private: private data passed to the callback function
  *
  * Recursively walk the page table for the memory area in a VMA,
  * calling supplied callbacks. Callbacks are called in-order (first
  * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
  * etc.). If lower-level callbacks are omitted, walking depth is reduced.
  *
- * Each callback receives an entry pointer, the start and end of the
- * associated range, and a caller-supplied private data pointer.
+ * Each callback receives an entry pointer and the start and end of the
+ * associated range, and a copy of the original mm_walk for access to
+ * the ->private or ->mm fields.
  *
  * No locks are taken, but the bottom level iterator will map PTE
  * directories from highmem if necessary.
@@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  * If any callback returns a non-zero value, the walk is aborted and
  * the return value is propagated back to the caller. Otherwise 0 is returned.
  */
-int walk_page_range(const struct mm_struct *mm,
-		    unsigned long addr, unsigned long end,
-		    const struct mm_walk *walk, void *private)
+int walk_page_range(unsigned long addr, unsigned long end,
+		    struct mm_walk *walk)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -112,21 +111,24 @@ int walk_page_range(const struct mm_struct *mm,
 	if (addr >= end)
 		return err;
 
-	pgd = pgd_offset(mm, addr);
+	if (!walk->mm)
+		return -EINVAL;
+
+	pgd = pgd_offset(walk->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, private);
+				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 		if (walk->pgd_entry)
-			err = walk->pgd_entry(pgd, addr, next, private);
+			err = walk->pgd_entry(pgd, addr, next, walk);
 		if (!err &&
 		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
-			err = walk_pud_range(pgd, addr, next, walk, private);
+			err = walk_pud_range(pgd, addr, next, walk);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-- 
cgit v1.2.3


From 9a8ea36967afad617d9b0930b6fe7592b9ed9772 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 16 Jun 2008 12:18:24 +0100
Subject: Remove #ifdef CONFIG_ARCH_SUPPORTS_AOUT from <linux/a.out.h>

This file is only included where it makes sense now, so there's no need
for the CONFIG_ARCH_SUPPORTS_AOUT conditional -- and that conditional is
bad, because we want to export <linux/a.out.h> to userspace.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Peter Korsgaard <jacmet@sunsite.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/a.out.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/a.out.h b/include/linux/a.out.h
index 208f4e8ed304..e86dfca44589 100644
--- a/include/linux/a.out.h
+++ b/include/linux/a.out.h
@@ -1,8 +1,6 @@
 #ifndef __A_OUT_GNU_H__
 #define __A_OUT_GNU_H__
 
-#ifdef CONFIG_ARCH_SUPPORTS_AOUT
-
 #define __GNU_EXEC_MACROS__
 
 #ifndef __STRUCT_EXEC_OVERRIDE__
@@ -277,10 +275,4 @@ struct relocation_info
 #endif /* no N_RELOCATION_INFO_DECLARED.  */
 
 #endif /*__ASSEMBLY__ */
-#else /* CONFIG_ARCH_SUPPORTS_AOUT */
-#ifndef __ASSEMBLY__
-struct exec {
-};
-#endif
-#endif /* CONFIG_ARCH_SUPPORTS_AOUT */
 #endif /* __A_OUT_GNU_H__ */
-- 
cgit v1.2.3


From e53d6a152793a38aa334d6f7a4850642ae45cedc Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 16 Jun 2008 12:24:17 +0100
Subject: Export <linux/a.out.h> to userspace again.

This seems to have been removed accidentally in commit
ed7b1889da256977574663689b598d88950bbd23 ("Unexport asm/page.h"), but
wasn't supposed to have been -- the original patch at
http://lkml.org/lkml/2007/10/30/144 just moved it from $(header-y) to
$(unifdef-y)

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Peter Korsgaard <jacmet@sunsite.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 93b98856007a..b6fbb2573e88 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -166,6 +166,9 @@ unifdef-y += acct.h
 unifdef-y += adb.h
 unifdef-y += adfs_fs.h
 unifdef-y += agpgart.h
+ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h)
+unifdef-y += a.out.h
+endif
 unifdef-y += apm_bios.h
 unifdef-y += atalk.h
 unifdef-y += atmdev.h
-- 
cgit v1.2.3


From 2b4743bd6be9fedaa560f8c6dc3997e9ec21b99b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 16 Jun 2008 16:48:20 -0700
Subject: ipv6 sit: Avoid extra need for compat layer in PRL management.

We've introduced extra need of compat layer for ip_tunnel_prl{}
for PRL (Potential Router List) management.  Though compat_ioctl
is still missing in ipv4/ipv6, let's make the interface more
straight-forward and eliminate extra need for nasty compat layer
anyway since the interface is new for 2.6.26.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  2 +-
 net/ipv6/sit.c            | 44 ++++++++++++++++++++++++--------------------
 2 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index f1fbe9c930d7..d4efe4014705 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -41,7 +41,7 @@ struct ip_tunnel_prl {
 	__u16			__reserved;
 	__u32			datalen;
 	__u32			__reserved2;
-	void __user		*data;
+	/* data follows */
 };
 
 /* PRL flags */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3de6ffdaedf2..32e871a6c25a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -222,15 +222,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 
 }
 
-static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
+static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
+				struct ip_tunnel_prl __user *a)
 {
-	struct ip_tunnel_prl *kp;
+	struct ip_tunnel_prl kprl, *kp;
 	struct ip_tunnel_prl_entry *prl;
 	unsigned int cmax, c = 0, ca, len;
 	int ret = 0;
 
-	cmax = a->datalen / sizeof(*a);
-	if (cmax > 1 && a->addr != htonl(INADDR_ANY))
+	if (copy_from_user(&kprl, a, sizeof(kprl)))
+		return -EFAULT;
+	cmax = kprl.datalen / sizeof(kprl);
+	if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
 		cmax = 1;
 
 	/* For simple GET or for root users,
@@ -261,26 +264,25 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 	for (prl = t->prl; prl; prl = prl->next) {
 		if (c > cmax)
 			break;
-		if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr)
+		if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
 			continue;
 		kp[c].addr = prl->addr;
 		kp[c].flags = prl->flags;
 		c++;
-		if (a->addr != htonl(INADDR_ANY))
+		if (kprl.addr != htonl(INADDR_ANY))
 			break;
 	}
 out:
 	read_unlock(&ipip6_lock);
 
 	len = sizeof(*kp) * c;
-	ret = len ? copy_to_user(a->data, kp, len) : 0;
+	ret = 0;
+	if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
+		ret = -EFAULT;
 
 	kfree(kp);
-	if (ret)
-		return -EFAULT;
 
-	a->datalen = len;
-	return 0;
+	return ret;
 }
 
 static int
@@ -873,11 +875,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 		break;
 
 	case SIOCGETPRL:
+		err = -EINVAL;
+		if (dev == sitn->fb_tunnel_dev)
+			goto done;
+		err = -ENOENT;
+		if (!(t = netdev_priv(dev)))
+			goto done;
+		err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
+		break;
+
 	case SIOCADDPRL:
 	case SIOCDELPRL:
 	case SIOCCHGPRL:
 		err = -EPERM;
-		if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN))
 			goto done;
 		err = -EINVAL;
 		if (dev == sitn->fb_tunnel_dev)
@@ -890,12 +901,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			goto done;
 
 		switch (cmd) {
-		case SIOCGETPRL:
-			err = ipip6_tunnel_get_prl(t, &prl);
-			if (!err && copy_to_user(ifr->ifr_ifru.ifru_data,
-						 &prl, sizeof(prl)))
-				err = -EFAULT;
-			break;
 		case SIOCDELPRL:
 			err = ipip6_tunnel_del_prl(t, &prl);
 			break;
@@ -904,8 +909,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
 			break;
 		}
-		if (cmd != SIOCGETPRL)
-			netdev_state_change(dev);
+		netdev_state_change(dev);
 		break;
 
 	default:
-- 
cgit v1.2.3


From c72580129209aaa509ace81c1f2ee1caa9c9774b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 26 Mar 2008 14:10:02 -0700
Subject: drivers/char/agp - use bool

Use boolean in AGP instead of having own TRUE/FALSE

--
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/agp.h          |  6 +++---
 drivers/char/agp/alpha-agp.c    |  4 ++--
 drivers/char/agp/amd-k7-agp.c   |  4 ++--
 drivers/char/agp/amd64-agp.c    |  4 ++--
 drivers/char/agp/ati-agp.c      |  4 ++--
 drivers/char/agp/efficeon-agp.c |  6 +++---
 drivers/char/agp/generic.c      | 24 ++++++++++++------------
 drivers/char/agp/hp-agp.c       |  6 +++---
 drivers/char/agp/i460-agp.c     |  2 +-
 drivers/char/agp/intel-agp.c    | 10 +++++-----
 drivers/char/agp/nvidia-agp.c   |  4 ++--
 drivers/char/agp/parisc-agp.c   |  6 +++---
 drivers/char/agp/sgi-agp.c      |  8 ++++----
 drivers/char/agp/sworks-agp.c   |  6 +++---
 drivers/char/agp/uninorth-agp.c | 10 +++++-----
 include/linux/agp_backend.h     | 14 +++-----------
 16 files changed, 55 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 99e6a406efb4..81e14bea54bd 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -99,8 +99,8 @@ struct agp_bridge_driver {
 	const void *aperture_sizes;
 	int num_aperture_sizes;
 	enum aper_size_type size_type;
-	int cant_use_aperture;
-	int needs_scratch_page;
+	bool cant_use_aperture;
+	bool needs_scratch_page;
 	const struct gatt_mask *masks;
 	int (*fetch_size)(void);
 	int (*configure)(void);
@@ -278,7 +278,7 @@ void agp_generic_destroy_page(void *addr, int flags);
 void agp_free_key(int key);
 int agp_num_entries(void);
 u32 agp_collect_device_status(struct agp_bridge_data *bridge, u32 mode, u32 command);
-void agp_device_command(u32 command, int agp_v3);
+void agp_device_command(u32 command, bool agp_v3);
 int agp_3_5_enable(struct agp_bridge_data *bridge);
 void global_cache_flush(void);
 void get_agp_version(struct agp_bridge_data *bridge);
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c
index e77c17838c8a..5da89f6c6c25 100644
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -80,7 +80,7 @@ static void alpha_core_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 	agp->mode.bits.enable = 1;
 	agp->ops->configure(agp);
 
-	agp_device_command(agp->mode.lw, 0);
+	agp_device_command(agp->mode.lw, false);
 }
 
 static int alpha_core_agp_insert_memory(struct agp_memory *mem, off_t pg_start,
@@ -126,7 +126,7 @@ struct agp_bridge_driver alpha_core_agp_driver = {
 	.aperture_sizes		= alpha_core_agp_sizes,
 	.num_aperture_sizes	= 1,
 	.size_type		= FIXED_APER_SIZE,
-	.cant_use_aperture	= 1,
+	.cant_use_aperture	= true,
 	.masks			= NULL,
 
 	.fetch_size		= alpha_core_agp_fetch_size,
diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c
index 96bdb9296b07..39a0718bc616 100644
--- a/drivers/char/agp/amd-k7-agp.c
+++ b/drivers/char/agp/amd-k7-agp.c
@@ -314,9 +314,9 @@ static int amd_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index d8200ac8f8cb..13665db363d6 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -90,9 +90,9 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index 07b4d8ff56e5..3a4566c0d84f 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -287,10 +287,10 @@ static int ati_insert_memory(struct agp_memory * mem,
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		/*CACHE_FLUSH(); */
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index cac0009cebc1..8ca6f262ef85 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -249,9 +249,9 @@ static int efficeon_insert_memory(struct agp_memory * mem, off_t pg_start, int t
 	if (type != 0 || mem->type != 0)
 		return -EINVAL;
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	last_page = NULL;
@@ -329,7 +329,7 @@ static const struct agp_bridge_driver efficeon_driver = {
 	.free_gatt_table	= efficeon_free_gatt_table,
 	.insert_memory		= efficeon_insert_memory,
 	.remove_memory		= efficeon_remove_memory,
-	.cant_use_aperture	= 0,	// 1 might be faster?
+	.cant_use_aperture	= false,	// true might be faster?
 
 	// Generic
 	.alloc_by_type		= agp_generic_alloc_by_type,
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index b6650a63197d..3e3625affdd1 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -188,7 +188,7 @@ void agp_free_memory(struct agp_memory *curr)
 	if (curr == NULL)
 		return;
 
-	if (curr->is_bound == TRUE)
+	if (curr->is_bound)
 		agp_unbind_memory(curr);
 
 	if (curr->type >= AGP_USER_TYPES) {
@@ -414,20 +414,20 @@ int agp_bind_memory(struct agp_memory *curr, off_t pg_start)
 	if (curr == NULL)
 		return -EINVAL;
 
-	if (curr->is_bound == TRUE) {
+	if (curr->is_bound) {
 		printk(KERN_INFO PFX "memory %p is already bound!\n", curr);
 		return -EINVAL;
 	}
-	if (curr->is_flushed == FALSE) {
+	if (!curr->is_flushed) {
 		curr->bridge->driver->cache_flush();
-		curr->is_flushed = TRUE;
+		curr->is_flushed = true;
 	}
 	ret_val = curr->bridge->driver->insert_memory(curr, pg_start, curr->type);
 
 	if (ret_val != 0)
 		return ret_val;
 
-	curr->is_bound = TRUE;
+	curr->is_bound = true;
 	curr->pg_start = pg_start;
 	return 0;
 }
@@ -449,7 +449,7 @@ int agp_unbind_memory(struct agp_memory *curr)
 	if (curr == NULL)
 		return -EINVAL;
 
-	if (curr->is_bound != TRUE) {
+	if (!curr->is_bound) {
 		printk(KERN_INFO PFX "memory %p was not bound!\n", curr);
 		return -EINVAL;
 	}
@@ -459,7 +459,7 @@ int agp_unbind_memory(struct agp_memory *curr)
 	if (ret_val != 0)
 		return ret_val;
 
-	curr->is_bound = FALSE;
+	curr->is_bound = false;
 	curr->pg_start = 0;
 	return 0;
 }
@@ -757,7 +757,7 @@ u32 agp_collect_device_status(struct agp_bridge_data *bridge, u32 requested_mode
 EXPORT_SYMBOL(agp_collect_device_status);
 
 
-void agp_device_command(u32 bridge_agpstat, int agp_v3)
+void agp_device_command(u32 bridge_agpstat, bool agp_v3)
 {
 	struct pci_dev *device = NULL;
 	int mode;
@@ -821,7 +821,7 @@ void agp_generic_enable(struct agp_bridge_data *bridge, u32 requested_mode)
 			/* If we have 3.5, we can do the isoch stuff. */
 			if (bridge->minor_version >= 5)
 				agp_3_5_enable(bridge);
-			agp_device_command(bridge_agpstat, TRUE);
+			agp_device_command(bridge_agpstat, true);
 			return;
 		} else {
 		    /* Disable calibration cycle in RX91<1> when not in AGP3.0 mode of operation.*/
@@ -838,7 +838,7 @@ void agp_generic_enable(struct agp_bridge_data *bridge, u32 requested_mode)
 	}
 
 	/* AGP v<3 */
-	agp_device_command(bridge_agpstat, FALSE);
+	agp_device_command(bridge_agpstat, false);
 }
 EXPORT_SYMBOL(agp_generic_enable);
 
@@ -1086,9 +1086,9 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type)
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		bridge->driver->cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c
index cbb0444467ba..80d7317f85c9 100644
--- a/drivers/char/agp/hp-agp.c
+++ b/drivers/char/agp/hp-agp.c
@@ -353,9 +353,9 @@ hp_zx1_insert_memory (struct agp_memory *mem, off_t pg_start, int type)
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = io_pg_start; i < mem->page_count; i++) {
@@ -437,7 +437,7 @@ const struct agp_bridge_driver hp_zx1_driver = {
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
-	.cant_use_aperture	= 1,
+	.cant_use_aperture	= true,
 };
 
 static int __init
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c
index 76f581c85a7d..e587eebebc67 100644
--- a/drivers/char/agp/i460-agp.c
+++ b/drivers/char/agp/i460-agp.c
@@ -580,7 +580,7 @@ const struct agp_bridge_driver intel_i460_driver = {
 	.alloc_by_type		= agp_generic_alloc_by_type,
 	.free_by_type		= agp_generic_free_by_type,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
-	.cant_use_aperture	= 1,
+	.cant_use_aperture	= true,
 };
 
 static int __devinit agp_intel_i460_probe(struct pci_dev *pdev,
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 01b03402ea92..6800b7e5b6f5 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -1658,7 +1658,7 @@ static const struct agp_bridge_driver intel_810_driver = {
 	.aperture_sizes		= intel_i810_sizes,
 	.size_type		= FIXED_APER_SIZE,
 	.num_aperture_sizes	= 2,
-	.needs_scratch_page	= TRUE,
+	.needs_scratch_page	= true,
 	.configure		= intel_i810_configure,
 	.fetch_size		= intel_i810_fetch_size,
 	.cleanup		= intel_i810_cleanup,
@@ -1707,7 +1707,7 @@ static const struct agp_bridge_driver intel_830_driver = {
 	.aperture_sizes		= intel_i830_sizes,
 	.size_type		= FIXED_APER_SIZE,
 	.num_aperture_sizes	= 4,
-	.needs_scratch_page	= TRUE,
+	.needs_scratch_page	= true,
 	.configure		= intel_i830_configure,
 	.fetch_size		= intel_i830_fetch_size,
 	.cleanup		= intel_i830_cleanup,
@@ -1878,7 +1878,7 @@ static const struct agp_bridge_driver intel_915_driver = {
 	.aperture_sizes		= intel_i830_sizes,
 	.size_type		= FIXED_APER_SIZE,
 	.num_aperture_sizes	= 4,
-	.needs_scratch_page	= TRUE,
+	.needs_scratch_page	= true,
 	.configure		= intel_i915_configure,
 	.fetch_size		= intel_i9xx_fetch_size,
 	.cleanup		= intel_i915_cleanup,
@@ -1904,7 +1904,7 @@ static const struct agp_bridge_driver intel_i965_driver = {
        .aperture_sizes         = intel_i830_sizes,
        .size_type              = FIXED_APER_SIZE,
        .num_aperture_sizes     = 4,
-       .needs_scratch_page     = TRUE,
+       .needs_scratch_page     = true,
        .configure              = intel_i915_configure,
        .fetch_size             = intel_i9xx_fetch_size,
        .cleanup                = intel_i915_cleanup,
@@ -1954,7 +1954,7 @@ static const struct agp_bridge_driver intel_g33_driver = {
 	.aperture_sizes         = intel_i830_sizes,
 	.size_type              = FIXED_APER_SIZE,
 	.num_aperture_sizes     = 4,
-	.needs_scratch_page     = TRUE,
+	.needs_scratch_page     = true,
 	.configure              = intel_i915_configure,
 	.fetch_size             = intel_i9xx_fetch_size,
 	.cleanup                = intel_i915_cleanup,
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 225ed2a53d45..eaceb61ba2dc 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -214,9 +214,9 @@ static int nvidia_insert_memory(struct agp_memory *mem, off_t pg_start, int type
 			return -EBUSY;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index 2939e3570f9d..8c42dcc5958c 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -141,9 +141,9 @@ parisc_agp_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = io_pg_start; i < mem->page_count; i++) {
@@ -226,7 +226,7 @@ static const struct agp_bridge_driver parisc_agp_driver = {
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
-	.cant_use_aperture	= 1,
+	.cant_use_aperture	= true,
 };
 
 static int __init
diff --git a/drivers/char/agp/sgi-agp.c b/drivers/char/agp/sgi-agp.c
index 98cf8abb3e57..b972d83bb1b2 100644
--- a/drivers/char/agp/sgi-agp.c
+++ b/drivers/char/agp/sgi-agp.c
@@ -182,9 +182,9 @@ static int sgi_tioca_insert_memory(struct agp_memory *mem, off_t pg_start,
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		bridge->driver->cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
@@ -264,8 +264,8 @@ const struct agp_bridge_driver sgi_tioca_driver = {
 	.agp_alloc_page = sgi_tioca_alloc_page,
 	.agp_destroy_page = agp_generic_destroy_page,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
-	.cant_use_aperture = 1,
-	.needs_scratch_page = 0,
+	.cant_use_aperture = true,
+	.needs_scratch_page = false,
 	.num_aperture_sizes = 1,
 };
 
diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c
index e08934e58f32..0e054c134490 100644
--- a/drivers/char/agp/sworks-agp.c
+++ b/drivers/char/agp/sworks-agp.c
@@ -339,9 +339,9 @@ static int serverworks_insert_memory(struct agp_memory *mem,
 		j++;
 	}
 
-	if (mem->is_flushed == FALSE) {
+	if (!mem->is_flushed) {
 		global_cache_flush();
-		mem->is_flushed = TRUE;
+		mem->is_flushed = true;
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
@@ -412,7 +412,7 @@ static void serverworks_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 			       bridge->capndx + PCI_AGP_COMMAND,
 			       command);
 
-	agp_device_command(command, 0);
+	agp_device_command(command, false);
 }
 
 static const struct agp_bridge_driver sworks_driver = {
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 42c0a600b1ac..d2fa3cfca02a 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -281,10 +281,10 @@ static void uninorth_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 
 	if (uninorth_rev >= 0x30) {
 		/* This is an AGP V3 */
-		agp_device_command(command, (status & AGPSTAT_MODE_3_0));
+		agp_device_command(command, (status & AGPSTAT_MODE_3_0) != 0);
 	} else {
 		/* AGP V2 */
-		agp_device_command(command, 0);
+		agp_device_command(command, false);
 	}
 
 	uninorth_tlbflush(NULL);
@@ -511,7 +511,7 @@ const struct agp_bridge_driver uninorth_agp_driver = {
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
-	.cant_use_aperture	= 1,
+	.cant_use_aperture	= true,
 };
 
 const struct agp_bridge_driver u3_agp_driver = {
@@ -536,8 +536,8 @@ const struct agp_bridge_driver u3_agp_driver = {
 	.agp_alloc_page		= agp_generic_alloc_page,
 	.agp_destroy_page	= agp_generic_destroy_page,
 	.agp_type_to_mask_type  = agp_generic_type_to_mask_type,
-	.cant_use_aperture	= 1,
-	.needs_scratch_page	= 1,
+	.cant_use_aperture	= true,
+	.needs_scratch_page	= true,
 };
 
 static struct agp_device_ids uninorth_agp_device_ids[] __devinitdata = {
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 661d90d6cf7c..09b4478ffacd 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -30,14 +30,6 @@
 #ifndef _AGP_BACKEND_H
 #define _AGP_BACKEND_H 1
 
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
 enum chipset_type {
 	NOT_SUPPORTED,
 	SUPPORTED,
@@ -57,7 +49,7 @@ struct agp_kern_info {
 	size_t aper_size;
 	int max_memory;		/* In pages */
 	int current_memory;
-	int cant_use_aperture;
+	bool cant_use_aperture;
 	unsigned long page_mask;
 	struct vm_operations_struct *vm_ops;
 };
@@ -83,8 +75,8 @@ struct agp_memory {
 	off_t pg_start;
 	u32 type;
 	u32 physical;
-	u8 is_bound;
-	u8 is_flushed;
+	bool is_bound;
+	bool is_flushed;
         u8 vmalloc_flag;
 };
 
-- 
cgit v1.2.3


From 9516b030b484fc99cf24213caf88df01f99248dd Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 19 Jun 2008 10:42:17 +1000
Subject: agp: more boolean conversions.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/compat_ioctl.c |  2 +-
 drivers/char/agp/frontend.c     | 12 ++++++------
 drivers/char/agp/generic.c      |  4 ++--
 drivers/char/agp/intel-agp.c    |  6 +++---
 include/linux/agp_backend.h     |  2 +-
 include/linux/agpgart.h         |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c
index 39275794fe63..58c57cb2518c 100644
--- a/drivers/char/agp/compat_ioctl.c
+++ b/drivers/char/agp/compat_ioctl.c
@@ -214,7 +214,7 @@ long compat_agp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		ret_val = -EINVAL;
 		goto ioctl_out;
 	}
-	if ((agp_fe.backend_acquired != TRUE) &&
+	if ((agp_fe.backend_acquired != true) &&
 	    (cmd != AGPIOC_ACQUIRE32)) {
 		ret_val = -EBUSY;
 		goto ioctl_out;
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 857b26227d87..e6cb1ab03e06 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -395,7 +395,7 @@ static int agp_remove_controller(struct agp_controller *controller)
 
 	if (agp_fe.current_controller == controller) {
 		agp_fe.current_controller = NULL;
-		agp_fe.backend_acquired = FALSE;
+		agp_fe.backend_acquired = false;
 		agp_backend_release(agp_bridge);
 	}
 	kfree(controller);
@@ -443,7 +443,7 @@ static void agp_controller_release_current(struct agp_controller *controller,
 	}
 
 	agp_fe.current_controller = NULL;
-	agp_fe.used_by_controller = FALSE;
+	agp_fe.used_by_controller = false;
 	agp_backend_release(agp_bridge);
 }
 
@@ -573,7 +573,7 @@ static int agp_mmap(struct file *file, struct vm_area_struct *vma)
 
 	mutex_lock(&(agp_fe.agp_mutex));
 
-	if (agp_fe.backend_acquired != TRUE)
+	if (agp_fe.backend_acquired != true)
 		goto out_eperm;
 
 	if (!(test_bit(AGP_FF_IS_VALID, &priv->access_flags)))
@@ -768,7 +768,7 @@ int agpioc_acquire_wrap(struct agp_file_private *priv)
 
 	atomic_inc(&agp_bridge->agp_in_use);
 
-	agp_fe.backend_acquired = TRUE;
+	agp_fe.backend_acquired = true;
 
 	controller = agp_find_controller_by_pid(priv->my_pid);
 
@@ -778,7 +778,7 @@ int agpioc_acquire_wrap(struct agp_file_private *priv)
 		controller = agp_create_controller(priv->my_pid);
 
 		if (controller == NULL) {
-			agp_fe.backend_acquired = FALSE;
+			agp_fe.backend_acquired = false;
 			agp_backend_release(agp_bridge);
 			return -ENOMEM;
 		}
@@ -981,7 +981,7 @@ static long agp_ioctl(struct file *file,
 		ret_val = -EINVAL;
 		goto ioctl_out;
 	}
-	if ((agp_fe.backend_acquired != TRUE) &&
+	if ((agp_fe.backend_acquired != true) &&
 	    (cmd != AGPIOC_ACQUIRE)) {
 		ret_val = -EBUSY;
 		goto ioctl_out;
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 3e3625affdd1..564daaa6c7d0 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -96,13 +96,13 @@ EXPORT_SYMBOL(agp_flush_chipset);
 void agp_alloc_page_array(size_t size, struct agp_memory *mem)
 {
 	mem->memory = NULL;
-	mem->vmalloc_flag = 0;
+	mem->vmalloc_flag = false;
 
 	if (size <= 2*PAGE_SIZE)
 		mem->memory = kmalloc(size, GFP_KERNEL | __GFP_NORETRY);
 	if (mem->memory == NULL) {
 		mem->memory = vmalloc(size);
-		mem->vmalloc_flag = 1;
+		mem->vmalloc_flag = true;
 	}
 }
 EXPORT_SYMBOL(agp_alloc_page_array);
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 6800b7e5b6f5..02356595ac1c 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -325,7 +325,7 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start,
 out:
 	ret = 0;
 out_err:
-	mem->is_flushed = 1;
+	mem->is_flushed = true;
 	return ret;
 }
 
@@ -795,7 +795,7 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start,
 out:
 	ret = 0;
 out_err:
-	mem->is_flushed = 1;
+	mem->is_flushed = true;
 	return ret;
 }
 
@@ -1022,7 +1022,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start,
  out:
 	ret = 0;
  out_err:
-	mem->is_flushed = 1;
+	mem->is_flushed = true;
 	return ret;
 }
 
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 09b4478ffacd..972b12bcfb36 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -77,7 +77,7 @@ struct agp_memory {
 	u32 physical;
 	bool is_bound;
 	bool is_flushed;
-        u8 vmalloc_flag;
+        bool vmalloc_flag;
 };
 
 #define AGP_NORMAL_MEMORY 0
diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h
index 62aef589eb94..c8fdb6e658e1 100644
--- a/include/linux/agpgart.h
+++ b/include/linux/agpgart.h
@@ -206,8 +206,8 @@ struct agp_front_data {
 	struct agp_controller *current_controller;
 	struct agp_controller *controllers;
 	struct agp_file_private *file_priv_list;
-	u8 used_by_controller;
-	u8 backend_acquired;
+	bool used_by_controller;
+	bool backend_acquired;
 };
 
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3


From 71c2742f5e6348d76ee62085cf0a13e5eff0f00e Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Sat, 21 Jun 2008 19:01:02 +0200
Subject: Add return value to reserve_bootmem_node()

This patch changes the function reserve_bootmem_node() from void to int,
returning -ENOMEM if the allocation fails.

This fixes a build problem on x86 with CONFIG_KEXEC=y and
CONFIG_NEED_MULTIPLE_NODES=y

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Reported-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 2 +-
 mm/bootmem.c            | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6a5dbdc8a7dc..686895bacd9d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -94,7 +94,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long freepfn,
 				       unsigned long startpfn,
 				       unsigned long endpfn);
-extern void reserve_bootmem_node(pg_data_t *pgdat,
+extern int reserve_bootmem_node(pg_data_t *pgdat,
 				 unsigned long physaddr,
 				 unsigned long size,
 				 int flags);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b9..8d9f60e06f62 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
 
-void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
 	int ret;
 
 	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
 	if (ret < 0)
-		return;
+		return -ENOMEM;
 	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+
+	return 0;
 }
 
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-- 
cgit v1.2.3


From 20d4fdc1a788e4ca0aaf2422772ba668e7e10839 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 9 Jun 2008 16:40:36 -0700
Subject: [patch 2/4] fs: make struct file arg to d_path const

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 2 +-
 include/linux/dcache.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 3ee588d5f585..c4c9072d810c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1847,7 +1847,7 @@ Elong:
  *
  * "buflen" should be positive. Caller holds the dcache_lock.
  */
-char *d_path(struct path *path, char *buf, int buflen)
+char *d_path(const struct path *path, char *buf, int buflen)
 {
 	char *res;
 	struct path root;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 2a6639407c80..d982eb89c77d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -300,7 +300,7 @@ extern int d_validate(struct dentry *, struct dentry *);
 extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
 
 extern char *__d_path(const struct path *path, struct path *root, char *, int);
-extern char *d_path(struct path *, char *, int);
+extern char *d_path(const struct path *, char *, int);
 extern char *dentry_path(struct dentry *, char *, int);
 
 /* Allocation counts.. */
-- 
cgit v1.2.3


From f9f48ec72bfc9489a30bc6ddbfcf27d86a8bc651 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 9 Jun 2008 16:40:38 -0700
Subject: [patch 4/4] flock: remove unused fields from file_lock_operations

fl_insert and fl_remove are not used right now in the kernel. Remove them.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/locks.c         | 6 ------
 include/linux/fs.h | 2 --
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 11dbf08651b7..dce8c747371c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
 	/* insert into file's list */
 	fl->fl_next = *pos;
 	*pos = fl;
-
-	if (fl->fl_ops && fl->fl_ops->fl_insert)
-		fl->fl_ops->fl_insert(fl);
 }
 
 /*
@@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
 		fl->fl_fasync = NULL;
 	}
 
-	if (fl->fl_ops && fl->fl_ops->fl_remove)
-		fl->fl_ops->fl_remove(fl);
-
 	if (fl->fl_nspid) {
 		put_pid(fl->fl_nspid);
 		fl->fl_nspid = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d490779f18d9..7c1080826832 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -894,8 +894,6 @@ static inline int file_check_writeable(struct file *filp)
 typedef struct files_struct *fl_owner_t;
 
 struct file_lock_operations {
-	void (*fl_insert)(struct file_lock *);	/* lock insertion callback */
-	void (*fl_remove)(struct file_lock *);	/* lock removal callback */
 	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
 	void (*fl_release_private)(struct file_lock *);
 };
-- 
cgit v1.2.3


From 36c7343b4ecac2432430f5393314f1bdc2c219a5 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 23 Jun 2008 12:06:52 +0100
Subject: tty_driver: Update required method documentation

Some of the requirement rules are now more relaxed. Also correct a
contradiction in the previous update

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty_driver.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 59f1c0bd8f9c..d2a003586761 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -27,8 +27,7 @@
  * 	This routine is called by the kernel to write a series of
  * 	characters to the tty device.  The characters may come from
  * 	user space or kernel space.  This routine will return the
- *	number of characters actually accepted for writing.  This
- *	routine is mandatory.
+ *	number of characters actually accepted for writing.
  *
  *	Optional: Required for writable devices.
  *
@@ -134,7 +133,7 @@
  * 	This routine notifies the tty driver that it should hangup the
  * 	tty device.
  *
- *	Required:
+ *	Optional:
  *
  * void (*break_ctl)(struct tty_stuct *tty, int state);
  *
-- 
cgit v1.2.3


From 06e05645661211b9eaadaf6344c335d2e80f0ba2 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 6 Jun 2008 16:37:36 -0300
Subject: KVM: close timer injection race window in __vcpu_run

If a timer fires after kvm_inject_pending_timer_irqs() but before
local_irq_disable() the code will enter guest mode and only inject such
timer interrupt the next time an unrelated event causes an exit.

It would be simpler if the timer->pending irq conversion could be done
with IRQ's disabled, so that the above problem cannot happen.

For now introduce a new vcpu requests bit to cancel guest entry.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c     | 9 ++++++---
 arch/x86/kvm/lapic.c     | 1 +
 arch/x86/kvm/x86.c       | 1 +
 include/linux/kvm_host.h | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f2f5d260874e..3829aa7b663f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	atomic_inc(&pt->pending);
 	smp_mb__after_atomic_inc();
-	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		wake_up_interruptible(&vcpu0->wq);
+	if (vcpu0) {
+		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+		if (waitqueue_active(&vcpu0->wq)) {
+			vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+			wake_up_interruptible(&vcpu0->wq);
+		}
 	}
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c297c50eba63..ebc03f5ae162 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 	wait_queue_head_t *q = &apic->vcpu->wq;
 
 	atomic_inc(&apic->timer.pending);
+	set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
 	if (waitqueue_active(q)) {
 		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(q);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b90744a1dc3a..b08812d6b34c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2774,6 +2774,7 @@ again:
 		}
 	}
 
+	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
 	kvm_inject_pending_timer_irqs(vcpu);
 
 	preempt_disable();
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 092b1b25291d..de9d1df4bba2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,6 +33,7 @@
 #define KVM_REQ_REPORT_TPR_ACCESS  2
 #define KVM_REQ_MMU_RELOAD         3
 #define KVM_REQ_TRIPLE_FAULT       4
+#define KVM_REQ_PENDING_TIMER      5
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
-- 
cgit v1.2.3


From d8de72473effd674a3c1fe9621821f406f5587c9 Mon Sep 17 00:00:00 2001
From: Peng Haitao <penght@cn.fujitsu.com>
Date: Tue, 20 May 2008 09:13:02 +0800
Subject: [PATCH] remove useless argument type in audit_filter_user()

The second argument "type" is not used in audit_filter_user(), so I think that type can be removed. If I'm wrong, please tell me.

Signed-off-by: Peng Haitao <penght@cn.fujitsu.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 2 +-
 kernel/audit.c        | 2 +-
 kernel/auditfilter.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 63c3bb98558f..8b82974bdc12 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -571,7 +571,7 @@ extern void		    audit_log_lost(const char *message);
 extern int		    audit_update_lsm_rules(void);
 
 				/* Private API (for audit.c only) */
-extern int audit_filter_user(struct netlink_skb_parms *cb, int type);
+extern int audit_filter_user(struct netlink_skb_parms *cb);
 extern int audit_filter_type(int type);
 extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 				void *data, size_t datasz, uid_t loginuid,
diff --git a/kernel/audit.c b/kernel/audit.c
index 56f30287e24c..e092f1c0ce30 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -738,7 +738,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (!audit_enabled && msg_type != AUDIT_USER_AVC)
 			return 0;
 
-		err = audit_filter_user(&NETLINK_CB(skb), msg_type);
+		err = audit_filter_user(&NETLINK_CB(skb));
 		if (err == 1) {
 			err = 0;
 			if (msg_type == AUDIT_USER_TTY) {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 75cdf262851d..98c50cc671bb 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1721,7 +1721,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
 	return 1;
 }
 
-int audit_filter_user(struct netlink_skb_parms *cb, int type)
+int audit_filter_user(struct netlink_skb_parms *cb)
 {
 	enum audit_state state = AUDIT_DISABLED;
 	struct audit_entry *e;
-- 
cgit v1.2.3


From 16d752397301b95abaa95cbaf9e785d221872311 Mon Sep 17 00:00:00 2001
From: Rene Herman <rene.herman@keyaccess.nl>
Date: Tue, 24 Jun 2008 19:38:56 +0200
Subject: thermal: Create CONFIG_THERMAL_HWMON=n

A bug in libsensors <= 2.10.6 is exposed
when this new hwmon I/F is enabled.
Create CONFIG_THERMAL_HWMON=n
until some time after libsensors 2.10.7 ships
so those users can run the latest kernel.

libsensors 3.x is already fixed -- those users
can use CONFIG_THERMAL_HWMON=y now.

Signed-off-by: Rene Herman <rene.herman@gmail.com>
Acked-by: Mark M. Hoffman <mhoffman@lightlink.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/feature-removal-schedule.txt | 9 +++++++++
 drivers/thermal/Kconfig                    | 9 +++++++++
 drivers/thermal/thermal_sys.c              | 4 ++--
 include/linux/thermal.h                    | 6 ++----
 4 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5b3f31faed56..46ece3fba6f9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -312,3 +312,12 @@ When:	2.6.26
 Why:	Implementation became generic; users should now include
 	linux/semaphore.h instead.
 Who:	Matthew Wilcox <willy@linux.intel.com>
+
+---------------------------
+
+What:	CONFIG_THERMAL_HWMON
+When:	January 2009
+Why:	This option was introduced just to allow older lm-sensors userspace
+	to keep working over the upgrade to 2.6.26. At the scheduled time of
+	removal fixed lm-sensors (2.x or 3.x) should be readily available.
+Who:	Rene Herman <rene.herman@gmail.com>
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 4b628526df09..a86e952ed4ca 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -12,3 +12,12 @@ menuconfig THERMAL
 	  cooling devices.
 	  All platforms with ACPI thermal support can use this driver.
 	  If you want this support, you should say Y or M here.
+
+config THERMAL_HWMON
+	bool "Hardware monitoring support"
+	depends on HWMON=y || HWMON=THERMAL
+	help
+	  The generic thermal sysfs driver's hardware monitoring support
+	  requires a 2.10.7/3.0.2 or later lm-sensors userspace.
+
+	  Say Y if your user-space is new enough.
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 6098787341f3..fe07462d5947 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -295,8 +295,8 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 
 /* Device management */
 
-#if defined(CONFIG_HWMON) ||	\
-	(defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE))
+#if defined(CONFIG_THERMAL_HWMON)
+
 /* hwmon sys I/F */
 #include <linux/hwmon.h>
 static LIST_HEAD(thermal_hwmon_list);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 06d3e6eb9ca8..917707e6151d 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -66,8 +66,7 @@ struct thermal_cooling_device {
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
 
-#if defined(CONFIG_HWMON) ||	\
-	(defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE))
+#if defined(CONFIG_THERMAL_HWMON)
 /* thermal zone devices with the same type share one hwmon device */
 struct thermal_hwmon_device {
 	char type[THERMAL_NAME_LENGTH];
@@ -94,8 +93,7 @@ struct thermal_zone_device {
 	struct idr idr;
 	struct mutex lock;	/* protect cooling devices list */
 	struct list_head node;
-#if defined(CONFIG_HWMON) ||	\
-	(defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE))
+#if defined(CONFIG_THERMAL_HWMON)
 	struct list_head hwmon_node;
 	struct thermal_hwmon_device *hwmon;
 	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
-- 
cgit v1.2.3


From b660398101cd0622325480a67ac88bb4d33d553a Mon Sep 17 00:00:00 2001
From: David Woodhouse <david.woodhouse@intel.com>
Date: Fri, 27 Jun 2008 14:39:42 +0100
Subject: kbuild: fix a.out.h export to userspace with O= build.

We need to check for existence of the a.out.h header in the source tree,
not the object tree, if we want it to get the right answer with O=.

Signed-off-by: David Woodhouse <david.woodhouse@intel.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/asm-generic/Kbuild.asm | 2 +-
 include/asm-powerpc/Kbuild     | 1 -
 include/linux/Kbuild           | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 92a6d91d0c1a..7cd25b8e7c9a 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,6 +1,6 @@
 header-y  += kvm.h
 
-ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h)
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
 unifdef-y += a.out.h
 endif
 unifdef-y += auxvec.h
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
index 7381916dfcbb..bca352e033c3 100644
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -1,6 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += a.out.h
 header-y += auxvec.h
 header-y += ioctls.h
 header-y += mman.h
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b6fbb2573e88..71d70d1fbce2 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -166,7 +166,7 @@ unifdef-y += acct.h
 unifdef-y += adb.h
 unifdef-y += adfs_fs.h
 unifdef-y += agpgart.h
-ifeq ($(wildcard include/asm-$(SRCARCH)/a.out.h),include/asm-$(SRCARCH)/a.out.h)
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
 unifdef-y += a.out.h
 endif
 unifdef-y += apm_bios.h
-- 
cgit v1.2.3


From 4bbff7e408a54cce88d26191191e8bcda2a60d55 Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Thu, 26 Jun 2008 09:13:48 -0400
Subject: Input: add KEY_MEDIA_REPEAT definition

This patch adds the Repeat key to the input layer. The usage
in the HUT is 0xBC (listed under "15.7 Transport Controls").

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index e075c4b762fb..d150c57e5f0a 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -534,8 +534,8 @@ struct input_absinfo {
 
 #define KEY_FRAMEBACK		0x1b4	/* Consumer - transport controls */
 #define KEY_FRAMEFORWARD	0x1b5
-
 #define KEY_CONTEXT_MENU	0x1b6	/* GenDesc - system context menu */
+#define KEY_MEDIA_REPEAT	0x1b7	/* Consumer - transport control */
 
 #define KEY_DEL_EOL		0x1c0
 #define KEY_DEL_EOS		0x1c1
-- 
cgit v1.2.3


From ab96dddbedf4bb8a7a0fe44012efc1d99598c36f Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 13:54:04 -0500
Subject: svcrdma: Add a type for keeping NFS RPC mapping

Create a new data structure to hold the remote client address space
to local server address space mapping.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          | 27 +++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma.c           | 19 +++++++++++++++++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 26 ++++++++++++++++++++++++++
 3 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 05eb4664d0dd..bd8749cc8084 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -86,6 +86,31 @@ struct svc_rdma_op_ctxt {
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
+/*
+ * NFS_ requests are mapped on the client side by the chunk lists in
+ * the RPCRDMA header. During the fetching of the RPC from the client
+ * and the writing of the reply to the client, the memory in the
+ * client and the memory in the server must be mapped as contiguous
+ * vaddr/len for access by the hardware. These data strucures keep
+ * these mappings.
+ *
+ * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
+ * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
+ * 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
+ * mapping of the reply.
+ */
+struct svc_rdma_chunk_sge {
+	int start;		/* sge no for this chunk */
+	int count;		/* sge count for this chunk */
+};
+struct svc_rdma_req_map {
+	unsigned long count;
+	union {
+		struct kvec sge[RPCSVC_MAXPAGES];
+		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+	};
+};
+
 #define RDMACTXT_F_LAST_CTXT	2
 
 struct svcxprt_rdma {
@@ -173,6 +198,8 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
+extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern struct svc_xprt_class svc_rdma_class;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 88c0ca20bb1e..171f2053e90c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,6 +69,9 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
+/* Temporary NFS request map cache */
+struct kmem_cache *svc_rdma_map_cachep;
+
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -241,6 +244,7 @@ void svc_rdma_cleanup(void)
 		svcrdma_table_header = NULL;
 	}
 	svc_unreg_xprt_class(&svc_rdma_class);
+	kmem_cache_destroy(svc_rdma_map_cachep);
 }
 
 int svc_rdma_init(void)
@@ -255,9 +259,24 @@ int svc_rdma_init(void)
 		svcrdma_table_header =
 			register_sysctl_table(svcrdma_root_table);
 
+	/* Create the temporary map cache */
+	svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
+						sizeof(struct svc_rdma_req_map),
+						0,
+						SLAB_HWCACHE_ALIGN,
+						NULL);
+	if (!svc_rdma_map_cachep) {
+		printk(KERN_INFO "Could not allocate map cache.\n");
+		goto err;
+	}
+
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
 	return 0;
+
+ err:
+	unregister_sysctl_table(svcrdma_table_header);
+	return -ENOMEM;
 }
 MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
 MODULE_DESCRIPTION("SVC RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e132509d1db0..ae90758d8e9b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -173,6 +173,32 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 	atomic_dec(&xprt->sc_ctxt_used);
 }
 
+/* Temporary NFS request map cache. Created in svc_rdma.c  */
+extern struct kmem_cache *svc_rdma_map_cachep;
+
+/*
+ * Temporary NFS req mappings are shared across all transport
+ * instances. These are short lived and should be bounded by the number
+ * of concurrent server threads * depth of the SQ.
+ */
+struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+{
+	struct svc_rdma_req_map *map;
+	while (1) {
+		map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
+		if (map)
+			break;
+		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+	}
+	map->count = 0;
+	return map;
+}
+
+void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+{
+	kmem_cache_free(svc_rdma_map_cachep, map);
+}
+
 /* ib_cq event handler */
 static void cq_event_handler(struct ib_event *event, void *context)
 {
-- 
cgit v1.2.3


From f820c57ebf5493d4602cc00577c8b0fadd27a7b8 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Tue, 27 May 2008 17:03:14 -0500
Subject: svcrdma: Use reply and chunk map for RDMA_READ processing

Modify the RDMA_READ processing to use the reply and chunk list mapping data
types. Also add a special purpose 'hdr_count' field in in the context to hold
the header page count instead of overloading the SGE length field and
corrupting the DMA map length.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h         |  1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 83 +++++++++++++++------------------
 2 files changed, 39 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index bd8749cc8084..fd5e8a1c17de 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct svc_rdma_op_ctxt *read_hdr;
+	int hdr_count;
 	struct list_head free_list;
 	struct xdr_buf arg;
 	struct list_head dto_q;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 06ab4841537b..d25971b42a74 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-struct chunk_sge {
-	int start;		/* sge no for this chunk */
-	int count;		/* sge count for this chunk */
-};
-
 /* Encode a read-chunk-list as an array of IB SGE
  *
  * Assumptions:
@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 			   struct svc_rqst *rqstp,
 			   struct svc_rdma_op_ctxt *head,
 			   struct rpcrdma_msg *rmsgp,
-			   struct ib_sge *sge,
-			   struct chunk_sge *ch_sge_ary,
+			   struct svc_rdma_req_map *rpl_map,
+			   struct svc_rdma_req_map *chl_map,
 			   int ch_count,
 			   int byte_count)
 {
@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	head->arg.head[0] = rqstp->rq_arg.head[0];
 	head->arg.tail[0] = rqstp->rq_arg.tail[0];
 	head->arg.pages = &head->pages[head->count];
-	head->sge[0].length = head->count; /* save count of hdr pages */
+	head->hdr_count = head->count; /* save count of hdr pages */
 	head->arg.page_base = 0;
 	head->arg.page_len = ch_bytes;
 	head->arg.len = rqstp->rq_arg.len + ch_bytes;
 	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
 	head->count++;
-	ch_sge_ary[0].start = 0;
+	chl_map->ch[0].start = 0;
 	while (byte_count) {
+		rpl_map->sge[sge_no].iov_base =
+			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
 		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
-		sge[sge_no].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					rqstp->rq_arg.pages[page_no],
-					page_off, sge_bytes,
-					DMA_FROM_DEVICE);
-		sge[sge_no].length = sge_bytes;
-		sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
+		rpl_map->sge[sge_no].iov_len = sge_bytes;
 		/*
 		 * Don't bump head->count here because the same page
 		 * may be used by multiple SGE.
@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 		 * SGE, move to the next SGE
 		 */
 		if (ch_bytes == 0) {
-			ch_sge_ary[ch_no].count =
-				sge_no - ch_sge_ary[ch_no].start;
+			chl_map->ch[ch_no].count =
+				sge_no - chl_map->ch[ch_no].start;
 			ch_no++;
 			ch++;
-			ch_sge_ary[ch_no].start = sge_no;
+			chl_map->ch[ch_no].start = sge_no;
 			ch_bytes = ch->rc_target.rs_length;
 			/* If bytes remaining account for next chunk */
 			if (byte_count) {
@@ -220,18 +211,24 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
 	return sge_no;
 }
 
-static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt,
-			      struct ib_sge *sge,
+static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+			      struct svc_rdma_op_ctxt *ctxt,
+			      struct kvec *vec,
 			      u64 *sgl_offset,
 			      int count)
 {
 	int i;
 
 	ctxt->count = count;
+	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
-		ctxt->sge[i].addr = sge[i].addr;
-		ctxt->sge[i].length = sge[i].length;
-		*sgl_offset = *sgl_offset + sge[i].length;
+		ctxt->sge[i].addr =
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  vec[i].iov_base, vec[i].iov_len,
+					  DMA_FROM_DEVICE);
+		ctxt->sge[i].length = vec[i].iov_len;
+		ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
+		*sgl_offset = *sgl_offset + vec[i].iov_len;
 	}
 }
 
@@ -282,34 +279,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	struct ib_send_wr read_wr;
 	int err = 0;
 	int ch_no;
-	struct ib_sge *sge;
 	int ch_count;
 	int byte_count;
 	int sge_count;
 	u64 sgl_offset;
 	struct rpcrdma_read_chunk *ch;
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_op_ctxt *tmp_sge_ctxt;
-	struct svc_rdma_op_ctxt *tmp_ch_ctxt;
-	struct chunk_sge *ch_sge_ary;
+	struct svc_rdma_req_map *rpl_map;
+	struct svc_rdma_req_map *chl_map;
 
 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
 	if (!ch)
 		return 0;
 
-	/* Allocate temporary contexts to keep SGE */
-	BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge));
-	tmp_sge_ctxt = svc_rdma_get_context(xprt);
-	sge = tmp_sge_ctxt->sge;
-	tmp_ch_ctxt = svc_rdma_get_context(xprt);
-	ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
+	/* Allocate temporary reply and chunk maps */
+	rpl_map = svc_rdma_get_req_map();
+	chl_map = svc_rdma_get_req_map();
 
 	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
 	sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
-				    sge, ch_sge_ary,
+				    rpl_map, chl_map,
 				    ch_count, byte_count);
 	sgl_offset = 0;
 	ch_no = 0;
@@ -331,14 +323,15 @@ next_sge:
 		read_wr.wr.rdma.remote_addr =
 			get_unaligned(&(ch->rc_target.rs_offset)) +
 			sgl_offset;
-		read_wr.sg_list = &sge[ch_sge_ary[ch_no].start];
+		read_wr.sg_list = ctxt->sge;
 		read_wr.num_sge =
-			rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count);
-		rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
+			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
+		rdma_set_ctxt_sge(xprt, ctxt,
+				  &rpl_map->sge[chl_map->ch[ch_no].start],
 				  &sgl_offset,
 				  read_wr.num_sge);
 		if (((ch+1)->rc_discrim == 0) &&
-		    (read_wr.num_sge == ch_sge_ary[ch_no].count)) {
+		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
 			/*
 			 * Mark the last RDMA_READ with a bit to
 			 * indicate all RPC data has been fetched from
@@ -358,9 +351,9 @@ next_sge:
 		}
 		atomic_inc(&rdma_stat_read);
 
-		if (read_wr.num_sge < ch_sge_ary[ch_no].count) {
-			ch_sge_ary[ch_no].count -= read_wr.num_sge;
-			ch_sge_ary[ch_no].start += read_wr.num_sge;
+		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
+			chl_map->ch[ch_no].count -= read_wr.num_sge;
+			chl_map->ch[ch_no].start += read_wr.num_sge;
 			goto next_sge;
 		}
 		sgl_offset = 0;
@@ -368,8 +361,8 @@ next_sge:
 	}
 
  out:
-	svc_rdma_put_context(tmp_sge_ctxt, 0);
-	svc_rdma_put_context(tmp_ch_ctxt, 0);
+	svc_rdma_put_req_map(rpl_map);
+	svc_rdma_put_req_map(chl_map);
 
 	/* Detach arg pages. svc_recv will replenish them */
 	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
@@ -399,7 +392,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
 	/* Point rq_arg.pages past header */
-	rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length];
+	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
 	rqstp->rq_arg.page_base = head->arg.page_base;
 
-- 
cgit v1.2.3


From 87295b6c5c7fd7bbc0ce3e7f42d2adbbac7352b9 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 13:17:44 -0500
Subject: svcrdma: Add dma map count and WARN_ON

Add a dma map count in order to verify that all DMA mapping resources
have been freed when the transport is closed.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 1 +
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 3 +++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 +++++
 4 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fd5e8a1c17de..ab93afc03c43 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -130,6 +130,7 @@ struct svcxprt_rdma {
 
 	struct ib_pd         *sc_pd;
 
+	atomic_t	     sc_dma_used;
 	atomic_t	     sc_ctxt_used;
 	struct list_head     sc_ctxt_free;
 	int		     sc_ctxt_cnt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index d25971b42a74..b4b17f44cb29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -222,6 +222,7 @@ static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
 	ctxt->count = count;
 	ctxt->direction = DMA_FROM_DEVICE;
 	for (i = 0; i < count; i++) {
+		atomic_inc(&xprt->sc_dma_used);
 		ctxt->sge[i].addr =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  vec[i].iov_base, vec[i].iov_len,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index bdc11a30e937..a19b22b452a3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -163,6 +163,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		sge_bytes = min((size_t)bc,
 				(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
 		sge[sge_no].length = sge_bytes;
+		atomic_inc(&xprt->sc_dma_used);
 		sge[sge_no].addr =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  (void *)
@@ -385,6 +386,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	ctxt->count = 1;
 
 	/* Prepare the SGE for the RPCRDMA Header */
+	atomic_inc(&rdma->sc_dma_used);
 	ctxt->sge[0].addr =
 		ib_dma_map_page(rdma->sc_cm_id->device,
 				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
@@ -396,6 +398,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
+		atomic_inc(&rdma->sc_dma_used);
 		ctxt->sge[sge_no].addr =
 			ib_dma_map_single(rdma->sc_cm_id->device,
 					  vec->sge[sge_no].iov_base,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 7e8ee66458ea..6fddd588c031 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -155,6 +155,7 @@ static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	int i;
 	for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+		atomic_dec(&xprt->sc_dma_used);
 		ib_dma_unmap_single(xprt->sc_cm_id->device,
 				    ctxt->sge[i].addr,
 				    ctxt->sge[i].length,
@@ -519,6 +520,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	cma_xprt->sc_max_requests = svcrdma_max_requests;
 	cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
 	atomic_set(&cma_xprt->sc_sq_count, 0);
+	atomic_set(&cma_xprt->sc_ctxt_used, 0);
 
 	if (!listener) {
 		int reqs = cma_xprt->sc_max_requests;
@@ -569,6 +571,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		BUG_ON(sge_no >= xprt->sc_max_sge);
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
+		atomic_inc(&xprt->sc_dma_used);
 		pa = ib_dma_map_page(xprt->sc_cm_id->device,
 				     page, 0, PAGE_SIZE,
 				     DMA_FROM_DEVICE);
@@ -1049,6 +1052,7 @@ static void __svc_rdma_free(struct work_struct *work)
 
 	/* Warn if we leaked a resource or under-referenced */
 	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
+	WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
 
 	/* Destroy the QP if present (not a listener) */
 	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1169,6 +1173,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
 	/* Prepare SGE for local address */
+	atomic_inc(&xprt->sc_dma_used);
 	sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
 				   p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
 	sge.lkey = xprt->sc_phys_mr->lkey;
-- 
cgit v1.2.3


From 779a48577ba88b6a7e9748a04b0b739f36c5e6f6 Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Mon, 19 May 2008 10:17:09 -0500
Subject: svcrdma: Remove unused wait q from svcrdma_xprt structure

The sc_read_wait queue head is no longer used. Remove it.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index ab93afc03c43..d8d74c4ab504 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -119,7 +119,6 @@ struct svcxprt_rdma {
 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
 	int		     sc_ord;		/* RDMA read limit */
-	wait_queue_head_t    sc_read_wait;
 	int                  sc_max_sge;
 
 	int                  sc_sq_depth;	/* Depth of SQ */
-- 
cgit v1.2.3


From 8948896c9e098c6fd31a6a698a598a7cbd7fa40e Mon Sep 17 00:00:00 2001
From: Tom Tucker <tom@opengridcomputing.com>
Date: Wed, 28 May 2008 15:14:02 -0500
Subject: svcrdma: Change WR context get/put to use the kmem cache

Change the WR context pool to be shared across mount points. This
reduces the RDMA transport memory footprint significantly since
idle mounts don't consume WR context memory.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   6 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 121 +++----------------------------
 2 files changed, 12 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d8d74c4ab504..ef2e3a20bf3b 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -73,7 +73,6 @@ extern atomic_t rdma_stat_sq_prod;
 struct svc_rdma_op_ctxt {
 	struct svc_rdma_op_ctxt *read_hdr;
 	int hdr_count;
-	struct list_head free_list;
 	struct xdr_buf arg;
 	struct list_head dto_q;
 	enum ib_wr_opcode wr_op;
@@ -131,11 +130,6 @@ struct svcxprt_rdma {
 
 	atomic_t	     sc_dma_used;
 	atomic_t	     sc_ctxt_used;
-	struct list_head     sc_ctxt_free;
-	int		     sc_ctxt_cnt;
-	int		     sc_ctxt_bump;
-	int		     sc_ctxt_max;
-	spinlock_t	     sc_ctxt_lock;
 	struct list_head     sc_rq_dto_q;
 	spinlock_t	     sc_rq_dto_lock;
 	struct ib_qp         *sc_qp;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 80104f4999d5..19ddc382b777 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,69 +84,23 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
 
-static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
-{
-	int target;
-	int at_least_one = 0;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump,
-		     xprt->sc_ctxt_max);
-
-	spin_lock_bh(&xprt->sc_ctxt_lock);
-	while (xprt->sc_ctxt_cnt < target) {
-		xprt->sc_ctxt_cnt++;
-		spin_unlock_bh(&xprt->sc_ctxt_lock);
-
-		ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-
-		spin_lock_bh(&xprt->sc_ctxt_lock);
-		if (ctxt) {
-			at_least_one = 1;
-			INIT_LIST_HEAD(&ctxt->free_list);
-			list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-		} else {
-			/* kmalloc failed...give up for now */
-			xprt->sc_ctxt_cnt--;
-			break;
-		}
-	}
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
-	dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n",
-		xprt->sc_ctxt_max, xprt->sc_ctxt_cnt);
-	return at_least_one;
-}
+/* WR context cache. Created in svc_rdma.c  */
+extern struct kmem_cache *svc_rdma_ctxt_cachep;
 
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt;
 
 	while (1) {
-		spin_lock_bh(&xprt->sc_ctxt_lock);
-		if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
-			/* Try to bump my cache. */
-			spin_unlock_bh(&xprt->sc_ctxt_lock);
-
-			if (rdma_bump_context_cache(xprt))
-				continue;
-
-			printk(KERN_INFO "svcrdma: sleeping waiting for "
-			       "context memory on xprt=%p\n",
-			       xprt);
-			schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-			continue;
-		}
-		ctxt = list_entry(xprt->sc_ctxt_free.next,
-				  struct svc_rdma_op_ctxt,
-				  free_list);
-		list_del_init(&ctxt->free_list);
-		spin_unlock_bh(&xprt->sc_ctxt_lock);
-		ctxt->xprt = xprt;
-		INIT_LIST_HEAD(&ctxt->dto_q);
-		ctxt->count = 0;
-		atomic_inc(&xprt->sc_ctxt_used);
-		break;
+		ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
+		if (ctxt)
+			break;
+		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
+	ctxt->xprt = xprt;
+	INIT_LIST_HEAD(&ctxt->dto_q);
+	ctxt->count = 0;
+	atomic_inc(&xprt->sc_ctxt_used);
 	return ctxt;
 }
 
@@ -174,9 +128,7 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 		for (i = 0; i < ctxt->count; i++)
 			put_page(ctxt->pages[i]);
 
-	spin_lock_bh(&xprt->sc_ctxt_lock);
-	list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
 	atomic_dec(&xprt->sc_ctxt_used);
 }
 
@@ -461,40 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
 	tasklet_schedule(&dto_tasklet);
 }
 
-static void create_context_cache(struct svcxprt_rdma *xprt,
-				 int ctxt_count, int ctxt_bump, int ctxt_max)
-{
-	struct svc_rdma_op_ctxt *ctxt;
-	int i;
-
-	xprt->sc_ctxt_max = ctxt_max;
-	xprt->sc_ctxt_bump = ctxt_bump;
-	xprt->sc_ctxt_cnt = 0;
-	atomic_set(&xprt->sc_ctxt_used, 0);
-
-	INIT_LIST_HEAD(&xprt->sc_ctxt_free);
-	for (i = 0; i < ctxt_count; i++) {
-		ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
-		if (ctxt) {
-			INIT_LIST_HEAD(&ctxt->free_list);
-			list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
-			xprt->sc_ctxt_cnt++;
-		}
-	}
-}
-
-static void destroy_context_cache(struct svcxprt_rdma *xprt)
-{
-	while (!list_empty(&xprt->sc_ctxt_free)) {
-		struct svc_rdma_op_ctxt *ctxt;
-		ctxt = list_entry(xprt->sc_ctxt_free.next,
-				  struct svc_rdma_op_ctxt,
-				  free_list);
-		list_del_init(&ctxt->free_list);
-		kfree(ctxt);
-	}
-}
-
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 					     int listener)
 {
@@ -511,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_read_complete_lock);
-	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 
 	cma_xprt->sc_ord = svcrdma_ord;
@@ -522,20 +439,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	atomic_set(&cma_xprt->sc_sq_count, 0);
 	atomic_set(&cma_xprt->sc_ctxt_used, 0);
 
-	if (!listener) {
-		int reqs = cma_xprt->sc_max_requests;
-		create_context_cache(cma_xprt,
-				     reqs << 1, /* starting size */
-				     reqs,	/* bump amount */
-				     reqs +
-				     cma_xprt->sc_sq_depth +
-				     RPCRDMA_MAX_THREADS + 1); /* max */
-		if (list_empty(&cma_xprt->sc_ctxt_free)) {
-			kfree(cma_xprt);
-			return NULL;
-		}
-		clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
-	} else
+	if (listener)
 		set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
 
 	return cma_xprt;
@@ -1077,7 +981,6 @@ static void __svc_rdma_free(struct work_struct *work)
 	/* Destroy the CM ID */
 	rdma_destroy_id(rdma->sc_cm_id);
 
-	destroy_context_cache(rdma);
 	kfree(rdma);
 }
 
-- 
cgit v1.2.3