From 5dc06c5a70b79a323152bec7e55783e705767e63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 14:49:55 +0200 Subject: block: remove READ_META and WRITE_META Replace all occurnanced of the undocumented READ_META with READ | REQ_META and remove the unused WRITE_META define. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 178cdb4f1d4a..eae44c981173 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -162,10 +162,8 @@ struct inodes_stat_t { #define READA RWA_MASK #define READ_SYNC (READ | REQ_SYNC) -#define READ_META (READ | REQ_META) #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_META (WRITE | REQ_META) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) -- cgit v1.2.1 From 65299a3b788bd274bed92f9fa3232082c9f3ea70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 14:50:29 +0200 Subject: block: separate priority boosting from REQ_META Add a new REQ_PRIO to let requests preempt others in the cfq I/O schedule, and lave REQ_META purely for marking requests as metadata in blktrace. All existing callers of REQ_META except for XFS are updated to also set REQ_PRIO for now. Signed-off-by: Christoph Hellwig Reviewed-by: Namhyung Kim Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 32f0076e844b..71fc53bb8f1c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -124,6 +124,7 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ + __REQ_PRIO, /* boost priority in cfq */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ @@ -161,14 +162,15 @@ enum rq_flag_bits { #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1 << __REQ_SYNC) #define REQ_META (1 << __REQ_META) +#define REQ_PRIO (1 << __REQ_PRIO) #define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ - REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ + REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.1 From 56ebdaf2fa3c5276be201c5d1aff1490b682ecf2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Aug 2011 16:04:34 +0200 Subject: block: simplify force plug flush code a little bit Cleaning up the code a little bit. attempt_plug_merge() traverses the plug list anyway, we can do the request counting there, so stack size is reduced a little bit. The motivation here is I suspect if we should count the requests for each queue (task could handle multiple disks in the meantime), but my test doesn't show it's worthy doing. If somebody proves we should do it, below change will make that more easier. Signed-off-by: Shaohua Li Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b15d54f8c2..7fbaa9103344 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -873,7 +873,6 @@ struct blk_plug { struct list_head list; struct list_head cb_list; unsigned int should_sort; - unsigned int count; }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.1 From ec0506dbe4e240ecd4c32bf74c84a88ce1ddb414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 28 Aug 2011 12:35:31 +0000 Subject: net: relax PKTINFO non local ipv6 udp xmit check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow transparent sockets to be less restrictive about the source ip of ipv6 udp packets being sent. Google-Bug-Id: 5018138 Signed-off-by: Maciej Żenczykowski CC: "Erik Kline" CC: "Lorenzo Colitti" Signed-off-by: David S. Miller --- include/net/transp_v6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 5271a741c3a3..498433dd067d 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -39,6 +39,7 @@ extern int datagram_recv_ctl(struct sock *sk, struct sk_buff *skb); extern int datagram_send_ctl(struct net *net, + struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, -- cgit v1.2.1 From c8ad620638f97bdb7c8ef8cc788f08a04b14eadc Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 29 Aug 2011 09:52:23 -0600 Subject: writeback: show raw dirtied_when in trace writeback_single_inode Save inode->dirtied_when in the raw trace output for reliable scripting, and to also show in formatted output the relative age in seconds for easy human reading. CC: Jan Kara Acked-by: Christoph Hellwig Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6bca4cc0063c..5f172703eb4f 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __array(char, name, 32) __field(unsigned long, ino) __field(unsigned long, state) - __field(unsigned long, age) + __field(unsigned long, dirtied_when) __field(unsigned long, writeback_index) __field(long, nr_to_write) __field(unsigned long, wrote) @@ -309,19 +309,19 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, dev_name(inode->i_mapping->backing_dev_info->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; - __entry->age = (jiffies - inode->dirtied_when) * - 1000 / HZ; + __entry->dirtied_when = inode->dirtied_when; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->nr_to_write = nr_to_write; __entry->wrote = nr_to_write - wbc->nr_to_write; ), - TP_printk("bdi %s: ino=%lu state=%s age=%lu " + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " "index=%lu to_write=%ld wrote=%lu", __entry->name, __entry->ino, show_inode_state(__entry->state), - __entry->age, + __entry->dirtied_when, + (jiffies - __entry->dirtied_when) / HZ, __entry->writeback_index, __entry->nr_to_write, __entry->wrote -- cgit v1.2.1 From 8efcc57dedfebc99c3cd39564e3fc47cd1a24b75 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Aug 2011 18:04:29 +0900 Subject: mfd: Fix value of WM8994_CONFIGURE_GPIO This needs to be an out of band value for the register and on this device registers are 16 bit so we must shift left one to the 17th bit. Signed-off-by: Mark Brown Cc: stable@kernel.org Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8994/pdata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index d12f8d635a81..97cf4f27d647 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -26,7 +26,7 @@ struct wm8994_ldo_pdata { struct regulator_init_data *init_data; }; -#define WM8994_CONFIGURE_GPIO 0x8000 +#define WM8994_CONFIGURE_GPIO 0x10000 #define WM8994_DRC_REGS 5 #define WM8994_EQ_REGS 20 -- cgit v1.2.1 From 185efc0f9a1f2d6ad6d4782c5d9e529f3290567f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 14 Sep 2011 16:21:58 -0700 Subject: memcg: Revert "memcg: add memory.vmscan_stat" Revert the post-3.0 commit 82f9d486e59f5 ("memcg: add memory.vmscan_stat"). The implementation of per-memcg reclaim statistics violates how memcg hierarchies usually behave: hierarchically. The reclaim statistics are accounted to child memcgs and the parent hitting the limit, but not to hierarchy levels in between. Usually, hierarchical statistics are perfectly recursive, with each level representing the sum of itself and all its children. Since this exports statistics to userspace, this may lead to confusion and problems with changing things after the release, so revert it now, we can try again later. Signed-off-by: Johannes Weiner Acked-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Michal Hocko Cc: Ying Han Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 ------------------- include/linux/swap.h | 6 ++++++ 2 files changed, 6 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3b535db00a94..343bd7661f2a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -39,16 +39,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct mem_cgroup *mem_cont, int active, int file); -struct memcg_scanrecord { - struct mem_cgroup *mem; /* scanend memory cgroup */ - struct mem_cgroup *root; /* scan target hierarchy root */ - int context; /* scanning context (see memcontrol.c) */ - unsigned long nr_scanned[2]; /* the number of scanned pages */ - unsigned long nr_rotated[2]; /* the number of rotated pages */ - unsigned long nr_freed[2]; /* the number of freed pages */ - unsigned long elapsed; /* nsec of time elapsed while scanning */ -}; - #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All "charge" functions with gfp_mask should use GFP_KERNEL or @@ -127,15 +117,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); -extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap, - struct memcg_scanrecord *rec); -extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap, - struct zone *zone, - struct memcg_scanrecord *rec, - unsigned long *nr_scanned); - #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 14d62490922e..c71f84bb62ec 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -252,6 +252,12 @@ static inline void lru_cache_add_file(struct page *page) extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, int mode, int file); +extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap); +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + struct zone *zone, + unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); -- cgit v1.2.1 From 4f5b04800a224aadb6cffcbbc3d3fa26e2367c7f Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 14 Sep 2011 16:22:29 -0700 Subject: drivers/gpio/gpio-generic.c: fix build errors Building a kernel with hotplug disabled results in a link failure: `bgpio_remove' referenced in section `___ksymtab_gpl+bgpio_remove' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o This is because of bgpio_remove() is exported. It is illegal to export symbols which are discarded either at link time or as part of an init/exit section. Fix this by dropping the __devexit attributation from bgpio_remove(). Also drop the __devinit attributation from bgpio_init(). Signed-off-by: Russell King Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/basic_mmio_gpio.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h index 98999cf107ce..feb912196745 100644 --- a/include/linux/basic_mmio_gpio.h +++ b/include/linux/basic_mmio_gpio.h @@ -63,15 +63,10 @@ static inline struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc) return container_of(gc, struct bgpio_chip, gc); } -int __devexit bgpio_remove(struct bgpio_chip *bgc); -int __devinit bgpio_init(struct bgpio_chip *bgc, - struct device *dev, - unsigned long sz, - void __iomem *dat, - void __iomem *set, - void __iomem *clr, - void __iomem *dirout, - void __iomem *dirin, - bool big_endian); +int bgpio_remove(struct bgpio_chip *bgc); +int bgpio_init(struct bgpio_chip *bgc, struct device *dev, + unsigned long sz, void __iomem *dat, void __iomem *set, + void __iomem *clr, void __iomem *dirout, void __iomem *dirin, + bool big_endian); #endif /* __BASIC_MMIO_GPIO_H */ -- cgit v1.2.1 From 946cedccbd7387488d2cee5da92cdfeb28d2e670 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2011 03:21:44 +0000 Subject: tcp: Change possible SYN flooding messages "Possible SYN flooding on port xxxx " messages can fill logs on servers. Change logic to log the message only once per listener, and add two new SNMP counters to track : TCPReqQFullDoCookies : number of times a SYNCOOKIE was replied to client TCPReqQFullDrop : number of times a SYN request was dropped because syncookies were not enabled. Based on a prior patch from Tom Herbert, and suggestions from David. Signed-off-by: Eric Dumazet CC: Tom Herbert Signed-off-by: David S. Miller --- include/linux/snmp.h | 2 ++ include/net/request_sock.h | 3 ++- include/net/tcp.h | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 12b2b18e50c1..e16557a357e5 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -231,6 +231,8 @@ enum LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */ + LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */ + LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ __LINUX_MIB_MAX }; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 99e6e19b57c2..4c0766e201e3 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog; */ struct listen_sock { u8 max_qlen_log; - /* 3 bytes hole, try to use */ + u8 synflood_warned; + /* 2 bytes hole, try to use */ int qlen; int qlen_young; int clock_hand; diff --git a/include/net/tcp.h b/include/net/tcp.h index 149a415d1e0a..e9b48b094683 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -460,6 +460,9 @@ extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); extern int tcp_send_synack(struct sock *); +extern int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); -- cgit v1.2.1 From 48c830120f2a20b44220aa26feda9ed15f49eaab Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 31 Aug 2011 08:03:29 +0000 Subject: net: copy userspace buffers on device forwarding dev_forward_skb loops an skb back into host networking stack which might hang on the memory indefinitely. In particular, this can happen in macvtap in bridged mode. Copy the userspace fragments to avoid blocking the sender in that case. As this patch makes skb_copy_ubufs extern now, I also added some documentation and made it clear the SKBTX_DEV_ZEROCOPY flag automatically instead of doing it in all callers. This can be made into a separate patch if people feel it's worth it. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b996ed86d5b..8bd383caa363 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, -- cgit v1.2.1 From d5ccd496601b8776a516d167a6485754575dc38f Mon Sep 17 00:00:00 2001 From: Max Matveev Date: Mon, 29 Aug 2011 21:02:24 +0000 Subject: sctp: deal with multiple COOKIE_ECHO chunks Attempt to reduce the number of IP packets emitted in response to single SCTP packet (2e3216cd) introduced a complication - if a packet contains two COOKIE_ECHO chunks and nothing else then SCTP state machine corks the socket while processing first COOKIE_ECHO and then loses the association and forgets to uncork the socket. To deal with the issue add new SCTP command which can be used to set association explictly. Use this new command when processing second COOKIE_ECHO chunk to restore the context for SCTP state machine. Signed-off-by: Max Matveev Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6506458ccd33..712b3bebeda7 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -109,6 +109,7 @@ typedef enum { SCTP_CMD_SEND_MSG, /* Send the whole use message */ SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ + SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST } sctp_verb_t; -- cgit v1.2.1 From 728871bc05afc8ff310b17dba3e57a2472792b13 Mon Sep 17 00:00:00 2001 From: David Ward Date: Mon, 5 Sep 2011 16:47:23 +0000 Subject: net: Align AF-specific flowi structs to long AF-specific flowi structs are now passed to flow_key_compare, which must also be aligned to a long. Signed-off-by: David Ward Signed-off-by: David S. Miller --- include/net/flow.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 78113daadd63..2ec377d9ab9f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -68,7 +68,7 @@ struct flowi4 { #define fl4_ipsec_spi uli.spi #define fl4_mh_type uli.mht.type #define fl4_gre_key uli.gre_key -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, @@ -112,7 +112,7 @@ struct flowi6 { #define fl6_ipsec_spi uli.spi #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowidn { struct flowi_common __fl_common; @@ -127,7 +127,7 @@ struct flowidn { union flowi_uli uli; #define fld_sport uli.ports.sport #define fld_dport uli.ports.dport -}; +} __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowi { union { -- cgit v1.2.1 From aa1c366e4febc7f5c2b84958a2dd7cd70e28f9d0 Mon Sep 17 00:00:00 2001 From: dpward Date: Mon, 5 Sep 2011 16:47:24 +0000 Subject: net: Handle different key sizes between address families in flow cache With the conversion of struct flowi to a union of AF-specific structs, some operations on the flow cache need to account for the exact size of the key. Signed-off-by: David Ward Signed-off-by: David S. Miller --- include/net/flow.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 2ec377d9ab9f..a09447749e2d 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -7,6 +7,7 @@ #ifndef _NET_FLOW_H #define _NET_FLOW_H +#include #include #include @@ -161,6 +162,24 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) return container_of(fldn, struct flowi, u.dn); } +typedef unsigned long flow_compare_t; + +static inline size_t flow_key_size(u16 family) +{ + switch (family) { + case AF_INET: + BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t)); + return sizeof(struct flowi4) / sizeof(flow_compare_t); + case AF_INET6: + BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t)); + return sizeof(struct flowi6) / sizeof(flow_compare_t); + case AF_DECnet: + BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t)); + return sizeof(struct flowidn) / sizeof(flow_compare_t); + } + return 0; +} + #define FLOW_DIR_IN 0 #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 -- cgit v1.2.1 From e05c82d3666119075615fdbf6abca0266344f27b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 18 Sep 2011 21:02:55 -0400 Subject: tcp: fix build error if !CONFIG_SYN_COOKIES commit 946cedccbd7387 (tcp: Change possible SYN flooding messages) added a build error if CONFIG_SYN_COOKIES=n Reported-by: Markus Trippelsdorf Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e9b48b094683..acc620a4a45f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -431,17 +431,34 @@ extern int tcp_disconnect(struct sock *sk, int flags); extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); +#ifdef CONFIG_SYN_COOKIES extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); +#else +static inline __u32 cookie_v4_init_sequence(struct sock *sk, + struct sk_buff *skb, + __u16 *mss) +{ + return 0; +} +#endif extern __u32 cookie_init_timestamp(struct request_sock *req); extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *); /* From net/ipv6/syncookies.c */ extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_SYN_COOKIES extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mss); - +#else +static inline __u32 cookie_v6_init_sequence(struct sock *sk, + struct sk_buff *skb, + __u16 *mss) +{ + return 0; +} +#endif /* tcp_output.c */ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, -- cgit v1.2.1 From 5bd078dda4d4fbdb4bd138a6bd5b6e274c019ed2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Sep 2011 11:31:36 -0500 Subject: irq: Add declaration of irq_domain_simple_ops to irqdomain.h irq_domain_simple_ops is exported, but is not declared in irqdomain.h, so add it. Signed-off-by: Rob Herring Cc: Grant Likely Cc: marc.zyngier@arm.com Cc: thomas.abraham@linaro.org Cc: jamie@jamieiles.com Cc: b-cousson@ti.com Cc: shawn.guo@linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: devicetree-discuss@lists.ozlabs.org Link: http://lkml.kernel.org/r/1316017900-19918-2-git-send-email-robherring2@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e807ad687a07..3ad553e8eae2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -80,6 +80,7 @@ extern void irq_domain_del(struct irq_domain *domain); #endif /* CONFIG_IRQ_DOMAIN */ #if defined(CONFIG_IRQ_DOMAIN) && defined(CONFIG_OF_IRQ) +extern struct irq_domain_ops irq_domain_simple_ops; extern void irq_domain_add_simple(struct device_node *controller, int irq_base); extern void irq_domain_generate_simple(const struct of_device_id *match, u64 phys_base, unsigned int irq_start); -- cgit v1.2.1 From b6cf8788a3382c2000743a0e393bcc8aeb0601cb Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 20 Sep 2011 17:07:29 +0200 Subject: [S390] kvm: extension capability for new address space layout 598841ca9919d008b520114d8a4378c4ce4e40a1 ([S390] use gmap address spaces for kvm guest images) changed kvm on s390 to use a separate address space for kvm guests. We can now put KVM guests anywhere in the user address mode with a size up to 8PB - as long as the memory is 1MB-aligned. This change was done without KVM extension capability bit. The change was added after 3.0, but we still have a chance to add a feature bit before 3.1 (keeping the releases in a sane state). We use number 71 to avoid collisions with other pending kvm patches as requested by Alexander Graf. Signed-off-by: Christian Borntraeger Acked-by: Avi Kivity Cc: Alexander Graf Signed-off-by: Heiko Carstens --- include/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2c366b52f505..aace6b8691a2 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -553,6 +553,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_SPAPR_TCE 63 #define KVM_CAP_PPC_SMT 64 #define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_S390_GMAP 71 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.1 From d94c177beeb4469cd4f6e83354ab0223353e98ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 26 Sep 2011 17:44:55 -0700 Subject: vfs pathname lookup: Add LOOKUP_AUTOMOUNT flag Since we've now turned around and made LOOKUP_FOLLOW *not* force an automount, we want to add the ability to force an automount event on lookup even if we don't happen to have one of the other flags that force it implicitly (LOOKUP_OPEN, LOOKUP_DIRECTORY, LOOKUP_PARENT..) Most cases will never want to use this, since you'd normally want to delay automounting as long as possible, which usually implies LOOKUP_OPEN (when we open a file or directory, we really cannot avoid the automount any more). But Trond argued sufficiently forcefully that at a minimum bind mounting a file and quotactl will want to force the automount lookup. Some other cases (like nfs_follow_remote_path()) could use it too, although LOOKUP_DIRECTORY would work there as well. This commit just adds the flag and logic, no users yet, though. It also doesn't actually touch the LOOKUP_NO_AUTOMOUNT flag that is related, and was made irrelevant by the same change that made us not follow on LOOKUP_FOLLOW. Cc: Trond Myklebust Cc: Ian Kent Cc: Jeff Layton Cc: Miklos Szeredi Cc: David Howells Cc: Al Viro Cc: Greg KH Signed-off-by: Linus Torvalds --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 76fe2c62ae71..e13dac7caab2 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -48,6 +48,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; */ #define LOOKUP_FOLLOW 0x0001 #define LOOKUP_DIRECTORY 0x0002 +#define LOOKUP_AUTOMOUNT 0x0004 #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 -- cgit v1.2.1 From b6c8069d3577481390b3f24a8434ad72a3235594 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 Sep 2011 08:12:33 -0700 Subject: vfs: remove LOOKUP_NO_AUTOMOUNT flag That flag no longer makes sense, since we don't look up automount points as eagerly any more. Additionally, it turns out that the NO_AUTOMOUNT handling was buggy to begin with: it would avoid automounting even for cases where we really *needed* to do the automount handling, and could return ENOENT for autofs entries that hadn't been instantiated yet. With our new non-eager automount semantics, one discussion has been about adding a AT_AUTOMOUNT flag to vfs_fstatat (and thus the newfstatat() and fstatat64() system calls), but it's probably not worth it: you can always force at least directory automounting by simply adding the final '/' to the filename, which works for *all* of the stat family system calls, old and new. So AT_NO_AUTOMOUNT (and thus LOOKUP_NO_AUTOMOUNT) really were just a result of our bad default behavior. Acked-by: Ian Kent Acked-by: Trond Myklebust Signed-off-by: Linus Torvalds --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index e13dac7caab2..409328d1cbbb 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -53,7 +53,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 -#define LOOKUP_NO_AUTOMOUNT 0x0080 + /* * Intent data */ -- cgit v1.2.1 From d670ec13178d0fd8680e6742a2bc6e04f28f87d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Sep 2011 12:42:04 +0200 Subject: posix-cpu-timers: Cure SMP wobbles David reported: Attached below is a watered-down version of rt/tst-cpuclock2.c from GLIBC. Just build it with "gcc -o test test.c -lpthread -lrt" or similar. Run it several times, and you will see cases where the main thread will measure a process clock difference before and after the nanosleep which is smaller than the cpu-burner thread's individual thread clock difference. This doesn't make any sense since the cpu-burner thread is part of the top-level process's thread group. I've reproduced this on both x86-64 and sparc64 (using both 32-bit and 64-bit binaries). For example: [davem@boricha build-x86_64-linux]$ ./test process: before(0.001221967) after(0.498624371) diff(497402404) thread: before(0.000081692) after(0.498316431) diff(498234739) self: before(0.001223521) after(0.001240219) diff(16698) [davem@boricha build-x86_64-linux]$ The diff of 'process' should always be >= the diff of 'thread'. I make sure to wrap the 'thread' clock measurements the most tightly around the nanosleep() call, and that the 'process' clock measurements are the outer-most ones. --- #include #include #include #include #include #include #include #include static pthread_barrier_t barrier; static void *chew_cpu(void *arg) { pthread_barrier_wait(&barrier); while (1) __asm__ __volatile__("" : : : "memory"); return NULL; } int main(void) { clockid_t process_clock, my_thread_clock, th_clock; struct timespec process_before, process_after; struct timespec me_before, me_after; struct timespec th_before, th_after; struct timespec sleeptime; unsigned long diff; pthread_t th; int err; err = clock_getcpuclockid(0, &process_clock); if (err) return 1; err = pthread_getcpuclockid(pthread_self(), &my_thread_clock); if (err) return 1; pthread_barrier_init(&barrier, NULL, 2); err = pthread_create(&th, NULL, chew_cpu, NULL); if (err) return 1; err = pthread_getcpuclockid(th, &th_clock); if (err) return 1; pthread_barrier_wait(&barrier); err = clock_gettime(process_clock, &process_before); if (err) return 1; err = clock_gettime(my_thread_clock, &me_before); if (err) return 1; err = clock_gettime(th_clock, &th_before); if (err) return 1; sleeptime.tv_sec = 0; sleeptime.tv_nsec = 500000000; nanosleep(&sleeptime, NULL); err = clock_gettime(th_clock, &th_after); if (err) return 1; err = clock_gettime(my_thread_clock, &me_after); if (err) return 1; err = clock_gettime(process_clock, &process_after); if (err) return 1; diff = process_after.tv_nsec - process_before.tv_nsec; printf("process: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n", process_before.tv_sec, process_before.tv_nsec, process_after.tv_sec, process_after.tv_nsec, diff); diff = th_after.tv_nsec - th_before.tv_nsec; printf("thread: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n", th_before.tv_sec, th_before.tv_nsec, th_after.tv_sec, th_after.tv_nsec, diff); diff = me_after.tv_nsec - me_before.tv_nsec; printf("self: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n", me_before.tv_sec, me_before.tv_nsec, me_after.tv_sec, me_after.tv_nsec, diff); return 0; } This is due to us using p->se.sum_exec_runtime in thread_group_cputime() where we iterate the thread group and sum all data. This does not take time since the last schedule operation (tick or otherwise) into account. We can cure this by using task_sched_runtime() at the cost of having to take locks. This also means we can (and must) do away with thread_group_sched_runtime() since the modified thread_group_cputime() is now more accurate and would deadlock when called from thread_group_sched_runtime(). Aside of that it makes the function safe on 32 bit systems. The old code added t->se.sum_exec_runtime unprotected. sum_exec_runtime is a 64bit value and could be changed on another cpu at the same time. Reported-by: David Miller Signed-off-by: Peter Zijlstra Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1314874459.7945.22.camel@twins Tested-by: David Miller Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0f..41d0237fd449 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1956,7 +1956,6 @@ static inline void disable_sched_clock_irqtime(void) {} extern unsigned long long task_sched_runtime(struct task_struct *task); -extern unsigned long long thread_group_sched_runtime(struct task_struct *task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP -- cgit v1.2.1