From 17b14451fd2b187ddd6303726755a3af0a926b6c Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Sep 2005 15:44:00 +0100 Subject: [PATCH] PATCH: remove function for non-PCI as requested Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 022105c745fc..ceee1fc42c60 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -393,6 +393,7 @@ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_i extern void ata_pci_remove_one (struct pci_dev *pdev); #endif /* CONFIG_PCI */ extern int ata_device_add(struct ata_probe_ent *ent); +extern void ata_host_set_remove(struct ata_host_set *host_set); extern int ata_scsi_detect(Scsi_Host_Template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); -- cgit v1.2.1 From e86ee6682b649183c11013a98be02f25e9ae399d Mon Sep 17 00:00:00 2001 From: Andy Currid Date: Mon, 19 Sep 2005 06:17:52 -0700 Subject: [PATCH] Add NVIDIA device ID in sata_nv Signed-off-by: Andy Currid Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6c1a142286a..cb414ea42f02 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1267,7 +1267,8 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E -#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x037E +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2 0x037F #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B -- cgit v1.2.1 From 590232a7150674b2036291eaefce085f3f9659c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 22 Sep 2005 04:30:44 -0300 Subject: [LLC]: Add sysctl support for the LLC timeouts Signed-off-by: Jochen Friedrich Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/sysctl.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3a29a9f9b451..fc8e367f671e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -202,7 +202,8 @@ enum NET_TR=14, NET_DECNET=15, NET_ECONET=16, - NET_SCTP=17, + NET_SCTP=17, + NET_LLC=18, }; /* /proc/sys/kernel/random */ @@ -522,6 +523,29 @@ enum { NET_IPX_FORWARDING=2 }; +/* /proc/sys/net/llc */ +enum { + NET_LLC2=1, + NET_LLC_STATION=2, +}; + +/* /proc/sys/net/llc/llc2 */ +enum { + NET_LLC2_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/station */ +enum { + NET_LLC_STATION_ACK_TIMEOUT=1, +}; + +/* /proc/sys/net/llc/llc2/timeout */ +enum { + NET_LLC2_ACK_TIMEOUT=1, + NET_LLC2_P_TIMEOUT=2, + NET_LLC2_REJ_TIMEOUT=3, + NET_LLC2_BUSY_TIMEOUT=4, +}; /* /proc/sys/net/appletalk */ enum { -- cgit v1.2.1 From 8c3520d4eb3b1bbf2e45fbae8dcfb8db06d5e775 Mon Sep 17 00:00:00 2001 From: Daniel Ritz Date: Sun, 21 Aug 2005 22:29:26 -0700 Subject: [PATCH] yenta: auto-tune EnE bridges for CardBus cards Echo Audio cardbus products are known to be incompatible with EnE bridges. in order to maybe solve the problem a EnE specific test bit has to be set, another cleared...but other setups have a good chance to break when just forcing the bits. so do the whole thingy automatically. The patch adds a hook in cb_alloc() that allows special tuning for the different chipsets. for ene just match the Echo products and set/clear the test bits, defaults to do the same thing as w/o the patch to not break working setups. Signed-off-by: Daniel Ritz Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b86a4b77007e..92efb2c767f9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2187,7 +2187,12 @@ #define PCI_DEVICE_ID_ENE_1211 0x1211 #define PCI_DEVICE_ID_ENE_1225 0x1225 #define PCI_DEVICE_ID_ENE_1410 0x1410 +#define PCI_DEVICE_ID_ENE_710 0x1411 +#define PCI_DEVICE_ID_ENE_712 0x1412 #define PCI_DEVICE_ID_ENE_1420 0x1420 +#define PCI_DEVICE_ID_ENE_720 0x1421 +#define PCI_DEVICE_ID_ENE_722 0x1422 + #define PCI_VENDOR_ID_CHELSIO 0x1425 #define PCI_VENDOR_ID_MIPS 0x153f -- cgit v1.2.1 From 6c1a10dba92cbacb58563f5eacf93807125b488a Mon Sep 17 00:00:00 2001 From: Daniel Ritz Date: Tue, 20 Sep 2005 14:12:17 -0700 Subject: [PATCH] yenta: add support for more TI bridges Support some more TI cardbus bridges. most of them are multifunction devices which adds 1394 controllers, smartcard readers etc. this could also help with the various problems with the XX21 controllers seen on the linux-pcmcia list. Signed-off-by: Daniel Ritz Signed-off-by: Dominik Brodowski --- include/linux/pci_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 92efb2c767f9..68f11ac1a314 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -769,6 +769,8 @@ #define PCI_DEVICE_ID_TI_TVP4010 0x3d04 #define PCI_DEVICE_ID_TI_TVP4020 0x3d07 #define PCI_DEVICE_ID_TI_4450 0x8011 +#define PCI_DEVICE_ID_TI_XX21_XX11 0x8031 +#define PCI_DEVICE_ID_TI_X515 0x8036 #define PCI_DEVICE_ID_TI_1130 0xac12 #define PCI_DEVICE_ID_TI_1031 0xac13 #define PCI_DEVICE_ID_TI_1131 0xac15 @@ -785,12 +787,17 @@ #define PCI_DEVICE_ID_TI_4451 0xac42 #define PCI_DEVICE_ID_TI_4510 0xac44 #define PCI_DEVICE_ID_TI_4520 0xac46 +#define PCI_DEVICE_ID_TI_7510 0xac47 +#define PCI_DEVICE_ID_TI_7610 0xac48 +#define PCI_DEVICE_ID_TI_7410 0xac49 #define PCI_DEVICE_ID_TI_1410 0xac50 #define PCI_DEVICE_ID_TI_1420 0xac51 #define PCI_DEVICE_ID_TI_1451A 0xac52 #define PCI_DEVICE_ID_TI_1620 0xac54 #define PCI_DEVICE_ID_TI_1520 0xac55 #define PCI_DEVICE_ID_TI_1510 0xac56 +#define PCI_DEVICE_ID_TI_X620 0xac8d +#define PCI_DEVICE_ID_TI_X420 0xac8e #define PCI_VENDOR_ID_SONY 0x104d #define PCI_DEVICE_ID_SONY_CXD3222 0x8039 -- cgit v1.2.1 From 4fb7edce52e5b6cf41e3375822d74a27f0b6f2dd Mon Sep 17 00:00:00 2001 From: Kars de Jong Date: Sun, 25 Sep 2005 14:39:46 +0200 Subject: [PATCH] pcmcia: fix cross-platform issues with pcmcia module aliases - Added a missing TO_NATIVE call to scripts/mod/file2alias.c:do_pcmcia_entry() - Add an alignment attribute to struct pcmcia_device_no to solve an alignment issue seen when cross-compiling on x86 for m68k. Signed-off-by: Kars de Jong Signed-off-by: Dominik Brodowski --- include/linux/mod_devicetable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 47da39ba3f03..4ed2107bc020 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -209,10 +209,11 @@ struct pcmcia_device_id { /* for real multi-function devices */ __u8 function; - /* for pseude multi-function devices */ + /* for pseudo multi-function devices */ __u8 device_no; - __u32 prod_id_hash[4]; + __u32 prod_id_hash[4] + __attribute__((aligned(sizeof(__u32)))); /* not matched against in kernelspace*/ #ifdef __KERNEL__ -- cgit v1.2.1 From acd042bb2de50d4e6fb969281a00cc8b8b71e46d Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Mon, 26 Sep 2005 15:06:50 -0700 Subject: [CONNECTOR]: async connector mode. If input message rate from userspace is too high, do not drop them, but try to deliver using work queue allocation. Failing there is some kind of congestion control. It also removes warn_on on this condition, which scares people. Signed-off-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 96de26301f84..86d4b0a81713 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -104,12 +104,19 @@ struct cn_queue_dev { struct sock *nls; }; -struct cn_callback { +struct cn_callback_id { unsigned char name[CN_CBQ_NAMELEN]; - struct cb_id id; +}; + +struct cn_callback_data { + void (*destruct_data) (void *); + void *ddata; + + void *callback_priv; void (*callback) (void *); - void *priv; + + void *free; }; struct cn_callback_entry { @@ -118,8 +125,8 @@ struct cn_callback_entry { struct work_struct work; struct cn_queue_dev *pdev; - void (*destruct_data) (void *); - void *ddata; + struct cn_callback_id id; + struct cn_callback_data data; int seq, group; struct sock *nls; @@ -144,7 +151,7 @@ int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); int cn_netlink_send(struct cn_msg *, u32, int); -int cn_queue_add_callback(struct cn_queue_dev *dev, struct cn_callback *cb); +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *); @@ -152,6 +159,8 @@ void cn_queue_free_dev(struct cn_queue_dev *dev); int cn_cb_equal(struct cb_id *, struct cb_id *); +void cn_queue_wrapper(void *data); + extern int cn_already_initialized; #endif /* __KERNEL__ */ -- cgit v1.2.1 From 188bab3ae0ed164bc18f98be932512d777dd038b Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 26 Sep 2005 15:25:11 -0700 Subject: [NETFILTER]: Fix invalid module autoloading by splitting iptable_nat When you've enabled conntrack and NAT as a module (standard case in all distributions), and you've also enabled the new conntrack netlink interface, loading ip_conntrack_netlink.ko will auto-load iptable_nat.ko. This causes a huge performance penalty, since for every packet you iterate the nat code, even if you don't want it. This patch splits iptable_nat.ko into the NAT core (ip_nat.ko) and the iptables frontend (iptable_nat.ko). Threfore, ip_conntrack_netlink.ko will only pull ip_nat.ko, but not the frontend. ip_nat.ko will "only" allocate some resources, but not affect runtime performance. This separation is also a nice step in anticipation of new packet filters (nf-hipac, ipset, pkttables) being able to use the NAT core. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_core.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 3b50eb91f007..30db23f06b03 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -5,16 +5,14 @@ /* This header used to share core functionality between the standalone NAT module, and the compatibility layer's use of NAT for masquerading. */ -extern int ip_nat_init(void); -extern void ip_nat_cleanup(void); -extern unsigned int nat_packet(struct ip_conntrack *ct, +extern unsigned int ip_nat_packet(struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo, unsigned int hooknum, struct sk_buff **pskb); -extern int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir); +extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir); #endif /* _IP_NAT_CORE_H */ -- cgit v1.2.1 From c4a3e0a529ab3e65223e81681c7c6b1bc188fa58 Mon Sep 17 00:00:00 2001 From: "Bagalkote, Sreenivas" Date: Tue, 20 Sep 2005 17:46:58 -0400 Subject: [SCSI] MegaRAID SAS RAID: new driver Signed-off-by: Sreenivas Bagalkote Signed-off-by: James Bottomley --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c49d28eca561..20fb79810c3c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -185,6 +185,7 @@ #define PCI_DEVICE_ID_LSI_61C102 0x0901 #define PCI_DEVICE_ID_LSI_63C815 0x1000 #define PCI_DEVICE_ID_LSI_SAS1064 0x0050 +#define PCI_DEVICE_ID_LSI_SAS1064R 0x0411 #define PCI_DEVICE_ID_LSI_SAS1066 0x005E #define PCI_DEVICE_ID_LSI_SAS1068 0x0054 #define PCI_DEVICE_ID_LSI_SAS1064A 0x005C @@ -559,6 +560,7 @@ #define PCI_VENDOR_ID_DELL 0x1028 #define PCI_DEVICE_ID_DELL_RACIII 0x0008 #define PCI_DEVICE_ID_DELL_RAC4 0x0012 +#define PCI_DEVICE_ID_DELL_PERC5 0x0015 #define PCI_VENDOR_ID_MATROX 0x102B #define PCI_DEVICE_ID_MATROX_MGA_2 0x0518 -- cgit v1.2.1 From 9356b8fc07dc126cd91d2b12f314d760ab48996e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Sep 2005 15:23:16 -0700 Subject: [NET]: Reorder some hot fields of struct net_device Place them on separate cache lines in SMP to lower memory bouncing between multiple CPU accessing the device. - One part is mostly used on receive path (including eth_type_trans()) (poll_list, poll, quota, weight, last_rx, dev_addr, broadcast) - One part is mostly used on queue transmit path (qdisc) (queue_lock, qdisc, qdisc_sleeping, qdisc_list, tx_queue_len) - One part is mostly used on xmit path (device) (xmit_lock, xmit_lock_owner, priv, hard_start_xmit, trans_start) 'features' is placed outside of these hot points, in a location that may be shared by all cpus (because mostly read) name_hlist is moved close to name[IFNAMSIZ] to speedup __dev_get_by_name() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 89 +++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c717907896d..368e4c825ff1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -265,6 +265,8 @@ struct net_device * the interface. */ char name[IFNAMSIZ]; + /* device name hash chain */ + struct hlist_node name_hlist; /* * I/O specific fields @@ -292,6 +294,21 @@ struct net_device /* ------- Fields preinitialized in Space.c finish here ------- */ + /* Net device features */ + unsigned long features; +#define NETIF_F_SG 1 /* Scatter/gather IO. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ +#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ +#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ +#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ +#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ +#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ +#define NETIF_F_LLTX 4096 /* LockLess TX */ + struct net_device *next_sched; /* Interface index. Unique device identifier */ @@ -316,9 +333,6 @@ struct net_device * will (read: may be cleaned up at will). */ - /* These may be needed for future network-power-down code. */ - unsigned long trans_start; /* Time (in jiffies) of last Tx */ - unsigned long last_rx; /* Time of last Rx */ unsigned short flags; /* interface flags (a la BSD) */ unsigned short gflags; @@ -328,15 +342,12 @@ struct net_device unsigned mtu; /* interface MTU value */ unsigned short type; /* interface hardware type */ unsigned short hard_header_len; /* hardware hdr length */ - void *priv; /* pointer to private data */ struct net_device *master; /* Pointer to master device of a group, * which this device is member of. */ /* Interface address info. */ - unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ - unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ @@ -346,8 +357,6 @@ struct net_device int promiscuity; int allmulti; - int watchdog_timeo; - struct timer_list watchdog_timer; /* Protocol specific pointers */ @@ -358,32 +367,62 @@ struct net_device void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ - struct list_head poll_list; /* Link to poll list */ +/* + * Cache line mostly used on receive path (including eth_type_trans()) + */ + struct list_head poll_list ____cacheline_aligned_in_smp; + /* Link to poll list */ + + int (*poll) (struct net_device *dev, int *quota); int quota; int weight; + unsigned long last_rx; /* Time of last Rx */ + /* Interface address info used in eth_type_trans() */ + unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast + because most packets are unicast) */ + + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ +/* + * Cache line mostly used on queue transmit path (qdisc) + */ + /* device queue lock */ + spinlock_t queue_lock ____cacheline_aligned_in_smp; struct Qdisc *qdisc; struct Qdisc *qdisc_sleeping; - struct Qdisc *qdisc_ingress; struct list_head qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ /* ingress path synchronizer */ spinlock_t ingress_lock; + struct Qdisc *qdisc_ingress; + +/* + * One part is mostly used on xmit path (device) + */ /* hard_start_xmit synchronizer */ - spinlock_t xmit_lock; + spinlock_t xmit_lock ____cacheline_aligned_in_smp; /* cpu id of processor entered to hard_start_xmit or -1, if nobody entered there. */ int xmit_lock_owner; - /* device queue lock */ - spinlock_t queue_lock; + void *priv; /* pointer to private data */ + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + /* These may be needed for future network-power-down code. */ + unsigned long trans_start; /* Time (in jiffies) of last Tx */ + + int watchdog_timeo; /* used by dev_watchdog() */ + struct timer_list watchdog_timer; + +/* + * refcnt is a very hot point, so align it on SMP + */ /* Number of references to this device */ - atomic_t refcnt; + atomic_t refcnt ____cacheline_aligned_in_smp; + /* delayed register/unregister */ struct list_head todo_list; - /* device name hash chain */ - struct hlist_node name_hlist; /* device index hash chain */ struct hlist_node index_hlist; @@ -396,21 +435,6 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Net device features */ - unsigned long features; -#define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ -#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ -#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ -#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ -#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ -#define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ -#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ -#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ -#define NETIF_F_LLTX 4096 /* LockLess TX */ - /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); /* Called after last user reference disappears. */ @@ -419,10 +443,7 @@ struct net_device /* Pointers to interface service routines. */ int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); #define HAVE_NETDEV_POLL - int (*poll) (struct net_device *dev, int *quota); int (*hard_header) (struct sk_buff *skb, struct net_device *dev, unsigned short type, -- cgit v1.2.1 From 1f26dac32057baaf67d10b45c6b5277db862911d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Sep 2005 15:24:13 -0700 Subject: [NET]: Add Sun Cassini driver. Written by Adrian Sun (asun@darksunrising.com). Ported to 2.6.x by Tom 'spot' Callaway . Further cleaned up and integrated by David S. Miller Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 68f11ac1a314..eb36fd293b41 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -392,6 +392,7 @@ #define PCI_DEVICE_ID_NS_87560_USB 0x0012 #define PCI_DEVICE_ID_NS_83815 0x0020 #define PCI_DEVICE_ID_NS_83820 0x0022 +#define PCI_DEVICE_ID_NS_SATURN 0x0035 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500 #define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501 #define PCI_DEVICE_ID_NS_SCx200_IDE 0x0502 @@ -983,6 +984,7 @@ #define PCI_DEVICE_ID_SUN_SABRE 0xa000 #define PCI_DEVICE_ID_SUN_HUMMINGBIRD 0xa001 #define PCI_DEVICE_ID_SUN_TOMATILLO 0xa801 +#define PCI_DEVICE_ID_SUN_CASSINI 0xabba #define PCI_VENDOR_ID_CMD 0x1095 #define PCI_DEVICE_ID_CMD_640 0x0640 -- cgit v1.2.1 From 2fab35d78f32fc107e1af4b1ec23f557fa20d911 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 27 Sep 2005 15:59:43 -0700 Subject: [NET]: Fix GCC4 compile error: sysctl in linux/if_ether.h The following is generated when compiling a recent (2.6.14-rc2-git5) kernel configured for ARM, with GCC4. CC init/main.o In file included from include/linux/netdevice.h:29, from include/net/sock.h:48, from init/main.c:50: include/linux/if_ether.h:114: error: array type has incomplete element type It seems that if CONFIG_SYSCTL is not set, then the compiler will throw an error due to the definition of the ether_table[] array Attached is a solution to the problem Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index fc2d4c8225aa..d21c305c6c64 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -111,7 +111,9 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL extern struct ctl_table ether_table[]; #endif +#endif #endif /* _LINUX_IF_ETHER_H */ -- cgit v1.2.1 From 664cceb0093b755739e56572b836a99104ee8a75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Sep 2005 17:03:15 +0100 Subject: [PATCH] Keys: Add possessor permissions to keys [try #3] The attached patch adds extra permission grants to keys for the possessor of a key in addition to the owner, group and other permissions bits. This makes SUID binaries easier to support without going as far as labelling keys and key targets using the LSM facilities. This patch adds a second "pointer type" to key structures (struct key_ref *) that can have the bottom bit of the address set to indicate the possession of a key. This is propagated through searches from the keyring to the discovered key. It has been made a separate type so that the compiler can spot attempts to dereference a potentially incorrect pointer. The "possession" attribute can't be attached to a key structure directly as it's not an intrinsic property of a key. Pointers to keys have been replaced with struct key_ref *'s wherever possession information needs to be passed through. This does assume that the bottom bit of the pointer will always be zero on return from kmem_cache_alloc(). The key reference type has been made into a typedef so that at least it can be located in the sources, even though it's basically a pointer to an undefined type. I've also renamed the accessor functions to be more useful, and all reference variables should now end in "_ref". Signed-Off-By: David Howells Signed-off-by: Linus Torvalds --- include/linux/key-ui.h | 28 ++++++++++++------ include/linux/key.h | 78 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h index cc326174a808..918c34a8347e 100644 --- a/include/linux/key-ui.h +++ b/include/linux/key-ui.h @@ -42,11 +42,14 @@ struct keyring_list { /* * check to see whether permission is granted to use a key in the desired way */ -static inline int key_permission(const struct key *key, key_perm_t perm) +static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { + struct key *key = key_ref_to_ptr(key_ref); key_perm_t kperm; - if (key->uid == current->fsuid) + if (is_key_possessed(key_ref)) + kperm = key->perm >> 24; + else if (key->uid == current->fsuid) kperm = key->perm >> 16; else if (key->gid != -1 && key->perm & KEY_GRP_ALL && @@ -65,11 +68,14 @@ static inline int key_permission(const struct key *key, key_perm_t perm) * check to see whether permission is granted to use a key in at least one of * the desired ways */ -static inline int key_any_permission(const struct key *key, key_perm_t perm) +static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm) { + struct key *key = key_ref_to_ptr(key_ref); key_perm_t kperm; - if (key->uid == current->fsuid) + if (is_key_possessed(key_ref)) + kperm = key->perm >> 24; + else if (key->uid == current->fsuid) kperm = key->perm >> 16; else if (key->gid != -1 && key->perm & KEY_GRP_ALL && @@ -94,13 +100,17 @@ static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid) return ret; } -static inline int key_task_permission(const struct key *key, +static inline int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { + struct key *key = key_ref_to_ptr(key_ref); key_perm_t kperm; - if (key->uid == context->fsuid) { + if (is_key_possessed(key_ref)) { + kperm = key->perm >> 24; + } + else if (key->uid == context->fsuid) { kperm = key->perm >> 16; } else if (key->gid != -1 && @@ -121,9 +131,9 @@ static inline int key_task_permission(const struct key *key, } -extern struct key *lookup_user_key(struct task_struct *context, - key_serial_t id, int create, int partial, - key_perm_t perm); +extern key_ref_t lookup_user_key(struct task_struct *context, + key_serial_t id, int create, int partial, + key_perm_t perm); extern long join_session_keyring(const char *name); diff --git a/include/linux/key.h b/include/linux/key.h index 970bbd916cf4..f1efa016dbf3 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -35,11 +35,18 @@ struct key; #undef KEY_DEBUGGING -#define KEY_USR_VIEW 0x00010000 /* user can view a key's attributes */ -#define KEY_USR_READ 0x00020000 /* user can read key payload / view keyring */ -#define KEY_USR_WRITE 0x00040000 /* user can update key payload / add link to keyring */ -#define KEY_USR_SEARCH 0x00080000 /* user can find a key in search / search a keyring */ -#define KEY_USR_LINK 0x00100000 /* user can create a link to a key/keyring */ +#define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */ +#define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */ +#define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */ +#define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */ +#define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */ +#define KEY_POS_ALL 0x1f000000 + +#define KEY_USR_VIEW 0x00010000 /* user permissions... */ +#define KEY_USR_READ 0x00020000 +#define KEY_USR_WRITE 0x00040000 +#define KEY_USR_SEARCH 0x00080000 +#define KEY_USR_LINK 0x00100000 #define KEY_USR_ALL 0x001f0000 #define KEY_GRP_VIEW 0x00000100 /* group permissions... */ @@ -65,6 +72,38 @@ struct key_owner; struct keyring_list; struct keyring_name; +/*****************************************************************************/ +/* + * key reference with possession attribute handling + * + * NOTE! key_ref_t is a typedef'd pointer to a type that is not actually + * defined. This is because we abuse the bottom bit of the reference to carry a + * flag to indicate whether the calling process possesses that key in one of + * its keyrings. + * + * the key_ref_t has been made a separate type so that the compiler can reject + * attempts to dereference it without proper conversion. + * + * the three functions are used to assemble and disassemble references + */ +typedef struct __key_reference_with_attributes *key_ref_t; + +static inline key_ref_t make_key_ref(const struct key *key, + unsigned long possession) +{ + return (key_ref_t) ((unsigned long) key | possession); +} + +static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) +{ + return (struct key *) ((unsigned long) key_ref & ~1UL); +} + +static inline unsigned long is_key_possessed(const key_ref_t key_ref) +{ + return (unsigned long) key_ref & 1UL; +} + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -215,20 +254,25 @@ static inline struct key *key_get(struct key *key) return key; } +static inline void key_ref_put(key_ref_t key_ref) +{ + key_put(key_ref_to_ptr(key_ref)); +} + extern struct key *request_key(struct key_type *type, const char *description, const char *callout_info); extern int key_validate(struct key *key); -extern struct key *key_create_or_update(struct key *keyring, - const char *type, - const char *description, - const void *payload, - size_t plen, - int not_in_quota); +extern key_ref_t key_create_or_update(key_ref_t keyring, + const char *type, + const char *description, + const void *payload, + size_t plen, + int not_in_quota); -extern int key_update(struct key *key, +extern int key_update(key_ref_t key, const void *payload, size_t plen); @@ -243,9 +287,9 @@ extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, extern int keyring_clear(struct key *keyring); -extern struct key *keyring_search(struct key *keyring, - struct key_type *type, - const char *description); +extern key_ref_t keyring_search(key_ref_t keyring, + struct key_type *type, + const char *description); extern int keyring_add_key(struct key *keyring, struct key *key); @@ -285,6 +329,10 @@ extern void key_init(void); #define key_serial(k) 0 #define key_get(k) ({ NULL; }) #define key_put(k) do { } while(0) +#define key_ref_put(k) do { } while(0) +#define make_key_ref(k) ({ NULL; }) +#define key_ref_to_ptr(k) ({ NULL; }) +#define is_key_possessed(k) 0 #define alloc_uid_keyring(u) 0 #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(t, k) ({ NULL; }) -- cgit v1.2.1 From aa55a08687059aa169d10a313c41f238c2070488 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 29 Sep 2005 19:58:53 +0400 Subject: [PATCH] fix TASK_STOPPED vs TASK_NONINTERACTIVE interaction do_signal_stop: for_each_thread(t) { if (t->state < TASK_STOPPED) ++sig->group_stop_count; } However, TASK_NONINTERACTIVE > TASK_STOPPED, so this loop will not count TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE threads. See also wait_task_stopped(), which checks ->state > TASK_STOPPED. Signed-off-by: Oleg Nesterov [ We really probably should always use the appropriate bitmasks to test task states, not do it like this. Using something like #define TASK_RUNNABLE (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | TASK_NONINTERACTIVE) and then doing "if (task->state & TASK_RUNNABLE)" or similar. But the ordering of the task states is historical, and keeping the ordering does make sense regardless. ] Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 49e617fa0f66..afe6c61f13e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -110,11 +110,11 @@ extern unsigned long nr_iowait(void); #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_STOPPED 4 -#define TASK_TRACED 8 -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 -#define TASK_NONINTERACTIVE 64 +#define TASK_NONINTERACTIVE 4 +#define TASK_STOPPED 8 +#define TASK_TRACED 16 +#define EXIT_ZOMBIE 32 +#define EXIT_DEAD 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.1 From 4a8342d233a39ee582e9f7260e12d2f5fd194a05 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Sep 2005 15:18:21 -0700 Subject: Revert task flag re-ordering, add comments Roland points out that the flags end up having non-obvious dependencies elsewhere, so revert aa55a08687059aa169d10a313c41f238c2070488 and add some comments about why things are as they are. We'll just have to fix up the broken comparisons. Roland has a patch. Signed-off-by: Linus Torvalds --- include/linux/sched.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index afe6c61f13e5..c3ba31f210a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -107,14 +107,26 @@ extern unsigned long nr_iowait(void); #include +/* + * Task state bitmask. NOTE! These bits are also + * encoded in fs/proc/array.c: get_task_state(). + * + * We have two separate sets of flags: task->state + * is about runnability, while task->exit_state are + * about the task exiting. Confusing, but this way + * modifying one set can't modify the other one by + * mistake. + */ #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_NONINTERACTIVE 4 -#define TASK_STOPPED 8 -#define TASK_TRACED 16 -#define EXIT_ZOMBIE 32 -#define EXIT_DEAD 64 +#define TASK_STOPPED 4 +#define TASK_TRACED 8 +/* in tsk->exit_state */ +#define EXIT_ZOMBIE 16 +#define EXIT_DEAD 32 +/* in tsk->state again */ +#define TASK_NONINTERACTIVE 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.1 From 897f15fb587fd2772b9e7ff6ec0265057f3c3975 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 30 Sep 2005 11:58:55 -0700 Subject: [PATCH] aio: remove unlocked task_list test and resulting race Only one of the run or kick path is supposed to put an iocb on the run list. If both of them do it than one of them can end up referencing a freed iocb. The kick path could delete the task_list item from the wait queue before getting the ctx_lock and putting the iocb on the run list. The run path was testing the task_list item outside the lock so that it could catch ki_retry methods that return -EIOCBRETRY *without* putting the iocb on a wait queue and promising to call kick_iocb. This unlocked check could then race with the kick path to cause both to try and put the iocb on the run list. The patch stops the run path from testing task_list by requring that any ki_retry that returns -EIOCBRETRY *must* guarantee that kick_iocb() will be called in the future. aio_p{read,write}, the only in-tree -EIOCBRETRY users, are updated. Signed-off-by: Zach Brown Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index a4d5af907f90..60def658b246 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -43,6 +43,40 @@ struct kioctx; #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) +/* is there a better place to document function pointer methods? */ +/** + * ki_retry - iocb forward progress callback + * @kiocb: The kiocb struct to advance by performing an operation. + * + * This callback is called when the AIO core wants a given AIO operation + * to make forward progress. The kiocb argument describes the operation + * that is to be performed. As the operation proceeds, perhaps partially, + * ki_retry is expected to update the kiocb with progress made. Typically + * ki_retry is set in the AIO core and it itself calls file_operations + * helpers. + * + * ki_retry's return value determines when the AIO operation is completed + * and an event is generated in the AIO event ring. Except the special + * return values described below, the value that is returned from ki_retry + * is transferred directly into the completion ring as the operation's + * resulting status. Once this has happened ki_retry *MUST NOT* reference + * the kiocb pointer again. + * + * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete() + * will be called on the kiocb pointer in the future. The AIO core will + * not ask the method again -- ki_retry must ensure forward progress. + * aio_complete() must be called once and only once in the future, multiple + * calls may result in undefined behaviour. + * + * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb() + * will be called on the kiocb pointer in the future. This may happen + * through generic helpers that associate kiocb->ki_wait with a wait + * queue head that ki_retry uses via current->io_wait. It can also happen + * with custom tracking and manual calls to kick_iocb(), though that is + * discouraged. In either case, kick_iocb() must be called once and only + * once. ki_retry must ensure forward progress, the AIO core will wait + * indefinitely for kick_iocb() to be called. + */ struct kiocb { struct list_head ki_run_list; long ki_flags; -- cgit v1.2.1 From fd2e54b35bd70d11c160ded4834e2378e915356e Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Sat, 1 Oct 2005 17:00:48 +0200 Subject: [PATCH] trivial #if -> #ifdef Use '#ifdef' consistently on __KERNEL__. This was reported as bug #5340 (isn't easier to send a fix than report the bug?!) Signed-off-by: Diego Calleja Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4ed2107bc020..2f0299a448f6 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -183,7 +183,7 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; -#if __KERNEL__ +#ifdef __KERNEL__ void *data; #else kernel_ulong_t data; -- cgit v1.2.1 From 325ed8239309cb29f10ea58c5a668058ead11479 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 13:57:23 -0700 Subject: [NET]: Fix packet timestamping. I've found the problem in general. It affects any 64-bit architecture. The problem occurs when you change the system time. Suppose that when you boot your system clock is forward by a day. This gets recorded down in skb_tv_base. You then wind the clock back by a day. From that point onwards the offset will be negative which essentially overflows the 32-bit variables they're stored in. In fact, why don't we just store the real time stamp in those 32-bit variables? After all, we're not going to overflow for quite a while yet. When we do overflow, we'll need a better solution of course. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2741c0c55e83..466c879f82b8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,8 +155,6 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) -extern struct timeval skb_tv_base; - struct skb_timeval { u32 off_sec; u32 off_usec; @@ -175,7 +173,7 @@ enum { * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @tstamp: Time we arrived stored as offset to skb_tv_base + * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -1255,10 +1253,6 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * { stamp->tv_sec = skb->tstamp.off_sec; stamp->tv_usec = skb->tstamp.off_usec; - if (skb->tstamp.off_sec) { - stamp->tv_sec += skb_tv_base.tv_sec; - stamp->tv_usec += skb_tv_base.tv_usec; - } } /** @@ -1272,8 +1266,8 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * */ static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) { - skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; - skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; + skb->tstamp.off_sec = stamp->tv_sec; + skb->tstamp.off_usec = stamp->tv_usec; } extern void __net_timestamp(struct sk_buff *skb); -- cgit v1.2.1 From 81c3d5470ecc70564eb9209946730fe2be93ad06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Oct 2005 14:13:38 -0700 Subject: [INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++-- include/linux/tc_ematch/tc_em_meta.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bb6f88e14061..e0b922785d98 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -372,8 +372,9 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ +#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 081b1ee8516e..e21937cf91d0 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -71,7 +71,7 @@ enum TCF_META_ID_SK_SNDBUF, TCF_META_ID_SK_ALLOCS, TCF_META_ID_SK_ROUTE_CAPS, - TCF_META_ID_SK_HASHENT, + TCF_META_ID_SK_HASH, TCF_META_ID_SK_LINGERTIME, TCF_META_ID_SK_ACK_BACKLOG, TCF_META_ID_SK_MAX_ACK_BACKLOG, -- cgit v1.2.1 From e5ed639913eea3e4783a550291775ab78dd84966 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 3 Oct 2005 14:35:55 -0700 Subject: [IPV4]: Replace __in_dev_get with __in_dev_get_rcu/rtnl The following patch renames __in_dev_get() to __in_dev_get_rtnl() and introduces __in_dev_get_rcu() to cover the second case. 1) RCU with refcnt should use in_dev_get(). 2) RCU without refcnt should use __in_dev_get_rcu(). 3) All others must hold RTNL and use __in_dev_get_rtnl(). There is one exception in net/ipv4/route.c which is in fact a pre-existing race condition. I've marked it as such so that we remember to fix it. This patch is based on suggestions and prior work by Suzanne Wood and Paul McKenney. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 7e1e15f934f3..fd7af86151b1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -142,13 +142,21 @@ static __inline__ int bad_mask(u32 mask, u32 addr) #define endfor_ifa(in_dev) } +static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) +{ + struct in_device *in_dev = dev->ip_ptr; + if (in_dev) + in_dev = rcu_dereference(in_dev); + return in_dev; +} + static __inline__ struct in_device * in_dev_get(const struct net_device *dev) { struct in_device *in_dev; rcu_read_lock(); - in_dev = dev->ip_ptr; + in_dev = __in_dev_get_rcu(dev); if (in_dev) atomic_inc(&in_dev->refcnt); rcu_read_unlock(); @@ -156,7 +164,7 @@ in_dev_get(const struct net_device *dev) } static __inline__ struct in_device * -__in_dev_get(const struct net_device *dev) +__in_dev_get_rtnl(const struct net_device *dev) { return (struct in_device*)dev->ip_ptr; } -- cgit v1.2.1 From ce0fe7e70a0ad11097a3773e9f3f0de3d859edf0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Oct 2005 17:43:06 +0100 Subject: [PATCH] bfs endianness annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/bfs_fs.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bfs_fs.h b/include/linux/bfs_fs.h index c1237aa92e38..8ed6dfdcd783 100644 --- a/include/linux/bfs_fs.h +++ b/include/linux/bfs_fs.h @@ -20,19 +20,19 @@ /* BFS inode layout on disk */ struct bfs_inode { - __u16 i_ino; + __le16 i_ino; __u16 i_unused; - __u32 i_sblock; - __u32 i_eblock; - __u32 i_eoffset; - __u32 i_vtype; - __u32 i_mode; - __s32 i_uid; - __s32 i_gid; - __u32 i_nlink; - __u32 i_atime; - __u32 i_mtime; - __u32 i_ctime; + __le32 i_sblock; + __le32 i_eblock; + __le32 i_eoffset; + __le32 i_vtype; + __le32 i_mode; + __le32 i_uid; + __le32 i_gid; + __le32 i_nlink; + __le32 i_atime; + __le32 i_mtime; + __le32 i_ctime; __u32 i_padding[4]; }; @@ -41,17 +41,17 @@ struct bfs_inode { #define BFS_DIRS_PER_BLOCK 32 struct bfs_dirent { - __u16 ino; + __le16 ino; char name[BFS_NAMELEN]; }; /* BFS superblock layout on disk */ struct bfs_super_block { - __u32 s_magic; - __u32 s_start; - __u32 s_end; - __s32 s_from; - __s32 s_to; + __le32 s_magic; + __le32 s_start; + __le32 s_end; + __le32 s_from; + __le32 s_to; __s32 s_bfrom; __s32 s_bto; char s_fsname[6]; @@ -66,15 +66,15 @@ struct bfs_super_block { #define BFS_INO2OFF(ino) \ ((__u32)(((ino) - BFS_ROOT_INO) * sizeof(struct bfs_inode)) + BFS_BSIZE) #define BFS_NZFILESIZE(ip) \ - ((cpu_to_le32((ip)->i_eoffset) + 1) - cpu_to_le32((ip)->i_sblock) * BFS_BSIZE) + ((le32_to_cpu((ip)->i_eoffset) + 1) - le32_to_cpu((ip)->i_sblock) * BFS_BSIZE) #define BFS_FILESIZE(ip) \ ((ip)->i_sblock == 0 ? 0 : BFS_NZFILESIZE(ip)) #define BFS_FILEBLOCKS(ip) \ - ((ip)->i_sblock == 0 ? 0 : (cpu_to_le32((ip)->i_eblock) + 1) - cpu_to_le32((ip)->i_sblock)) + ((ip)->i_sblock == 0 ? 0 : (le32_to_cpu((ip)->i_eblock) + 1) - le32_to_cpu((ip)->i_sblock)) #define BFS_UNCLEAN(bfs_sb, sb) \ - ((cpu_to_le32(bfs_sb->s_from) != -1) && (cpu_to_le32(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) + ((le32_to_cpu(bfs_sb->s_from) != -1) && (le32_to_cpu(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) #endif /* _LINUX_BFS_FS_H */ -- cgit v1.2.1 From 7b5b3f3d826ea87c224c66de9c95c09e7f110ecd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:38:44 -0700 Subject: [ATM]: fix sparse gfp nocast warnings Fix implicit nocast warnings in atm code: net/atm/atm_misc.c:35:44: warning: implicit cast to nocast type drivers/atm/fore200e.c:183:33: warning: implicit cast to nocast type Also use kzalloc() instead of kmalloc(). Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9f374cfa1b05..f1fd849e5535 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -457,7 +457,7 @@ static inline void atm_dev_put(struct atm_dev *dev) int atm_charge(struct atm_vcc *vcc,int truesize); struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - int gfp_flags); + unsigned int __nocast gfp_flags); int atm_pcr_goal(struct atm_trafprm *tp); void vcc_release_async(struct atm_vcc *vcc, int reply); -- cgit v1.2.1 From 17b698856328a42d5874ac87640e2cd84a824eef Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:41:16 -0700 Subject: [CONNECTOR]: fix sparse gfp nocast warnings Fix implicit nocast warnings in connector code: drivers/connector/connector.c:102:24: warning: implicit cast to nocast type drivers/connector/connector.c:114:45: warning: implicit cast to nocast type Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/connector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 86d4b0a81713..96582c9911ac 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -149,7 +149,7 @@ struct cn_dev { int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); -int cn_netlink_send(struct cn_msg *, u32, int); +int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast); int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); -- cgit v1.2.1 From 3d2aef668920e8d93b77f145f8f647f62abe75db Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Oct 2005 22:45:14 -0700 Subject: [TEXTSEARCH]: fix sparse gfp nocast warnings Fix nocast sparse warnings: include/linux/textsearch.h:165:57: warning: implicit cast to nocast type Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/textsearch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 941f45ac117a..1a4990e448e9 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -158,7 +158,8 @@ extern unsigned int textsearch_find_continuous(struct ts_config *, #define TS_PRIV_ALIGNTO 8 #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) -static inline struct ts_config *alloc_ts_config(size_t payload, int gfp_mask) +static inline struct ts_config *alloc_ts_config(size_t payload, + unsigned int __nocast gfp_mask) { struct ts_config *conf; -- cgit v1.2.1 From 0f21ba7cc3320d33459ecb3f538f1a42040c29cd Mon Sep 17 00:00:00 2001 From: Eric Kinzie Date: Thu, 6 Oct 2005 22:19:28 -0700 Subject: [ATM]: add support for LECS addresses learned from network From: Eric Kinzie Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- include/linux/atmdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index f1fd849e5535..aca9b344bd35 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -76,6 +76,13 @@ struct atm_dev_stats { /* set interface ESI */ #define ATM_SETESIF _IOW('a',ATMIOC_ITF+13,struct atmif_sioc) /* force interface ESI */ +#define ATM_ADDLECSADDR _IOW('a', ATMIOC_ITF+14, struct atmif_sioc) + /* register a LECS address */ +#define ATM_DELLECSADDR _IOW('a', ATMIOC_ITF+15, struct atmif_sioc) + /* unregister a LECS address */ +#define ATM_GETLECSADDR _IOW('a', ATMIOC_ITF+16, struct atmif_sioc) + /* retrieve LECS address(es) */ + #define ATM_GETSTAT _IOW('a',ATMIOC_SARCOM+0,struct atmif_sioc) /* get AAL layer statistics */ #define ATM_GETSTATZ _IOW('a',ATMIOC_SARCOM+1,struct atmif_sioc) @@ -328,6 +335,8 @@ struct atm_dev_addr { struct list_head entry; /* next address */ }; +enum atm_addr_type_t { ATM_ADDR_LOCAL, ATM_ADDR_LECS }; + struct atm_dev { const struct atmdev_ops *ops; /* device operations; NULL if unused */ const struct atmphy_ops *phy; /* PHY operations, may be undefined */ @@ -338,6 +347,7 @@ struct atm_dev { void *phy_data; /* private PHY date */ unsigned long flags; /* device flags (ATM_DF_*) */ struct list_head local; /* local ATM addresses */ + struct list_head lecs; /* LECS ATM addresses learned via ILMI */ unsigned char esi[ESI_LEN]; /* ESI ("MAC" addr) */ struct atm_cirange ci_range; /* VPI/VCI range */ struct k_atm_dev_stats stats; /* statistics */ -- cgit v1.2.1 From 468ed2b0c85ec4310b429e60358213b6d077289e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Oct 2005 15:07:38 +0100 Subject: [PATCH] Keys: Split key permissions checking into a .c file The attached patch splits key permissions checking out of key-ui.h and moves it into a .c file. It's quite large and called quite a lot, and it's about to get bigger with the addition of LSM support for keys... key_any_permission() is also discarded as it's no longer used. Signed-Off-By: David Howells Signed-off-by: Linus Torvalds --- include/linux/key-ui.h | 91 +++----------------------------------------------- 1 file changed, 5 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h index 918c34a8347e..7a2e332067c3 100644 --- a/include/linux/key-ui.h +++ b/include/linux/key-ui.h @@ -38,97 +38,16 @@ struct keyring_list { struct key *keys[0]; }; - /* * check to see whether permission is granted to use a key in the desired way */ -static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) -{ - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) - kperm = key->perm >> 24; - else if (key->uid == current->fsuid) - kperm = key->perm >> 16; - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && - in_group_p(key->gid) - ) - kperm = key->perm >> 8; - else - kperm = key->perm; - - kperm = kperm & perm & KEY_ALL; - - return kperm == perm; -} - -/* - * check to see whether permission is granted to use a key in at least one of - * the desired ways - */ -static inline int key_any_permission(const key_ref_t key_ref, key_perm_t perm) -{ - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) - kperm = key->perm >> 24; - else if (key->uid == current->fsuid) - kperm = key->perm >> 16; - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && - in_group_p(key->gid) - ) - kperm = key->perm >> 8; - else - kperm = key->perm; +extern int key_task_permission(const key_ref_t key_ref, + struct task_struct *context, + key_perm_t perm); - kperm = kperm & perm & KEY_ALL; - - return kperm != 0; -} - -static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid) -{ - int ret; - - task_lock(tsk); - ret = groups_search(tsk->group_info, gid); - task_unlock(tsk); - return ret; -} - -static inline int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, - key_perm_t perm) +static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { - struct key *key = key_ref_to_ptr(key_ref); - key_perm_t kperm; - - if (is_key_possessed(key_ref)) { - kperm = key->perm >> 24; - } - else if (key->uid == context->fsuid) { - kperm = key->perm >> 16; - } - else if (key->gid != -1 && - key->perm & KEY_GRP_ALL && ( - key->gid == context->fsgid || - key_task_groups_search(context, key->gid) - ) - ) { - kperm = key->perm >> 8; - } - else { - kperm = key->perm; - } - - kperm = kperm & perm & KEY_ALL; - - return kperm == perm; - + return key_task_permission(key_ref, current, perm); } extern key_ref_t lookup_user_key(struct task_struct *context, -- cgit v1.2.1 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/atmdev.h | 2 +- include/linux/bio.h | 6 +++--- include/linux/buffer_head.h | 2 +- include/linux/connector.h | 2 +- include/linux/cpuset.h | 5 ++--- include/linux/dmapool.h | 2 +- include/linux/gfp.h | 14 +++++++------- include/linux/jbd.h | 2 +- include/linux/kfifo.h | 4 ++-- include/linux/mempool.h | 9 ++++----- include/linux/netlink.h | 2 +- include/linux/pagemap.h | 2 +- include/linux/posix_acl.h | 6 +++--- include/linux/radix-tree.h | 2 +- include/linux/security.h | 6 ++---- include/linux/skbuff.h | 28 ++++++++++++++-------------- include/linux/slab.h | 19 +++++++++---------- include/linux/string.h | 2 +- include/linux/swap.h | 2 +- include/linux/textsearch.h | 2 +- include/linux/types.h | 4 ++++ include/linux/vmalloc.h | 4 ++-- 22 files changed, 63 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index aca9b344bd35..e7d0593bb576 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -467,7 +467,7 @@ static inline void atm_dev_put(struct atm_dev *dev) int atm_charge(struct atm_vcc *vcc,int truesize); struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - unsigned int __nocast gfp_flags); + gfp_t gfp_flags); int atm_pcr_goal(struct atm_trafprm *tp); void vcc_release_async(struct atm_vcc *vcc, int reply); diff --git a/include/linux/bio.h b/include/linux/bio.h index 6e1c79c8b6bf..3344b4e8e43a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -276,8 +276,8 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int, int); extern void bioset_free(struct bio_set *); -extern struct bio *bio_alloc(unsigned int __nocast, int); -extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *); +extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); @@ -287,7 +287,7 @@ extern int bio_phys_segments(struct request_queue *, struct bio *); extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); -extern struct bio *bio_clone(struct bio *, unsigned int __nocast); +extern struct bio *bio_clone(struct bio *, gfp_t); extern void bio_init(struct bio *); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 90828493791f..6a1d154c0825 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -172,7 +172,7 @@ void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, int size); struct buffer_head *__bread(struct block_device *, sector_t block, int size); -struct buffer_head *alloc_buffer_head(unsigned int __nocast gfp_flags); +struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void FASTCALL(unlock_buffer(struct buffer_head *bh)); void FASTCALL(__lock_buffer(struct buffer_head *bh)); diff --git a/include/linux/connector.h b/include/linux/connector.h index 96582c9911ac..95952cc1f525 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -149,7 +149,7 @@ struct cn_dev { int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); void cn_del_callback(struct cb_id *); -int cn_netlink_send(struct cn_msg *, u32, unsigned int __nocast); +int cn_netlink_send(struct cn_msg *, u32, gfp_t); int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 24062a1dbf61..6e2deef96b34 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,7 +23,7 @@ void cpuset_init_current_mems_allowed(void); void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); +extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -49,8 +49,7 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 1; } -static inline int cpuset_zone_allowed(struct zone *z, - unsigned int __nocast gfp_mask) +static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return 1; } diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 4932ee5c77f0..76f12f46db7f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -19,7 +19,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, void dma_pool_destroy(struct dma_pool *pool); -void *dma_pool_alloc(struct dma_pool *pool, unsigned int __nocast mem_flags, +void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4dc990f3b5cc..3010e172394d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,9 +85,9 @@ static inline void arch_free_page(struct page *page, int order) { } #endif extern struct page * -FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); +FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); -static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask, +static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) @@ -98,17 +98,17 @@ static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_m } #ifdef CONFIG_NUMA -extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order); +extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); static inline struct page * -alloc_pages(unsigned int __nocast gfp_mask, unsigned int order) +alloc_pages(gfp_t gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) return NULL; return alloc_pages_current(gfp_mask, order); } -extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, +extern struct page *alloc_page_vma(gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr); #else #define alloc_pages(gfp_mask, order) \ @@ -117,8 +117,8 @@ extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask)); +extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order)); +extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask)); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) diff --git a/include/linux/jbd.h b/include/linux/jbd.h index de097269bd7f..ff853b3173c6 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -935,7 +935,7 @@ void journal_put_journal_head(struct journal_head *jh); */ extern kmem_cache_t *jbd_handle_cache; -static inline handle_t *jbd_alloc_handle(unsigned int __nocast gfp_flags) +static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { return kmem_cache_alloc(jbd_handle_cache, gfp_flags); } diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c27cd428d269..48eccd865bd8 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -35,8 +35,8 @@ struct kfifo { }; extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, - unsigned int __nocast gfp_mask, spinlock_t *lock); -extern struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, + gfp_t gfp_mask, spinlock_t *lock); +extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock); extern void kfifo_free(struct kfifo *fifo); extern unsigned int __kfifo_put(struct kfifo *fifo, diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 796220ce47cc..f2427d7394b0 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -6,7 +6,7 @@ #include -typedef void * (mempool_alloc_t)(unsigned int __nocast gfp_mask, void *pool_data); +typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); typedef struct mempool_s { @@ -26,17 +26,16 @@ extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, int nid); -extern int mempool_resize(mempool_t *pool, int new_min_nr, - unsigned int __nocast gfp_mask); +extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); extern void mempool_destroy(mempool_t *pool); -extern void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask); +extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); extern void mempool_free(void *element, mempool_t *pool); /* * A mempool_alloc_t and mempool_free_t that get the memory from * a slab that is passed in through pool_data. */ -void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data); +void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); #endif /* _LINUX_MEMPOOL_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index bdebdc564506..ba25ca874c20 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,7 +131,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (* extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, - __u32 group, unsigned int __nocast allocation); + __u32 group, gfp_t allocation); extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9a25647a295..acbf31c154f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -19,7 +19,7 @@ #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ -static inline unsigned int __nocast mapping_gfp_mask(struct address_space * mapping) +static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->flags & __GFP_BITS_MASK; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 4caedddaa033..4bc241290c24 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -71,11 +71,11 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ -extern struct posix_acl *posix_acl_alloc(int, unsigned int __nocast); -extern struct posix_acl *posix_acl_clone(const struct posix_acl *, unsigned int __nocast); +extern struct posix_acl *posix_acl_alloc(int, gfp_t); +extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); -extern struct posix_acl *posix_acl_from_mode(mode_t, unsigned int __nocast); +extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); extern int posix_acl_create_masq(struct posix_acl *, mode_t *); extern int posix_acl_chmod_masq(struct posix_acl *, mode_t); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 9c51917b1cce..045d4761febc 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -50,7 +50,7 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -int radix_tree_preload(unsigned int __nocast gfp_mask); +int radix_tree_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/include/linux/security.h b/include/linux/security.h index 0e43460d374e..627382e74057 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2634,8 +2634,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, - unsigned int __nocast priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2752,8 +2751,7 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, - unsigned int __nocast priority) +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 466c879f82b8..8f5d9e7f8734 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -302,37 +302,37 @@ struct sk_buff { extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - unsigned int __nocast priority, int fclone); + gfp_t priority, int fclone); static inline struct sk_buff *alloc_skb(unsigned int size, - unsigned int __nocast priority) + gfp_t priority) { return __alloc_skb(size, priority, 0); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, - unsigned int __nocast priority) + gfp_t priority) { return __alloc_skb(size, priority, 1); } extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - unsigned int __nocast priority); + gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_clone(struct sk_buff *skb, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *pskb_copy(struct sk_buff *skb, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - unsigned int __nocast gfp_mask); + gfp_t gfp_mask); extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, @@ -484,7 +484,7 @@ static inline int skb_shared(const struct sk_buff *skb) * NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, - unsigned int __nocast pri) + gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_shared(skb)) { @@ -516,7 +516,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, * %NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, - unsigned int __nocast pri) + gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) { @@ -1017,7 +1017,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) * %NULL is returned in there is no free memory. */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); if (likely(skb)) @@ -1130,8 +1130,8 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp); -static inline int skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp) +extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); +static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) { return __skb_linearize(skb, gfp); } diff --git a/include/linux/slab.h b/include/linux/slab.h index 1f356f3bbc64..5fc04a16ecb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -61,11 +61,11 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo void (*)(void *, kmem_cache_t *, unsigned long)); extern int kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *); -extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); +extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -74,9 +74,9 @@ struct cache_sizes { kmem_cache_t *cs_dmacachep; }; extern struct cache_sizes malloc_sizes[]; -extern void *__kmalloc(size_t, unsigned int __nocast); +extern void *__kmalloc(size_t, gfp_t); -static inline void *kmalloc(size_t size, unsigned int __nocast flags) +static inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { int i = 0; @@ -99,7 +99,7 @@ found: return __kmalloc(size, flags); } -extern void *kzalloc(size_t, unsigned int __nocast); +extern void *kzalloc(size_t, gfp_t); /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -107,7 +107,7 @@ extern void *kzalloc(size_t, unsigned int __nocast); * @size: element size. * @flags: the type of memory to allocate. */ -static inline void *kcalloc(size_t n, size_t size, unsigned int __nocast flags) +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) { if (n != 0 && size > INT_MAX / n) return NULL; @@ -118,15 +118,14 @@ extern void kfree(const void *); extern unsigned int ksize(const void *); #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node(kmem_cache_t *, - unsigned int __nocast flags, int node); -extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); +extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); +extern void *kmalloc_node(size_t size, gfp_t flags, int node); #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) { return kmem_cache_alloc(cachep, flags); } -static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return kmalloc(size, flags); } diff --git a/include/linux/string.h b/include/linux/string.h index dab2652acbd8..369be3264a55 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif -extern char *kstrdup(const char *s, unsigned int __nocast gfp); +extern char *kstrdup(const char *s, gfp_t gfp); #ifdef __cplusplus } diff --git a/include/linux/swap.h b/include/linux/swap.h index 3c9ff0048153..a7bf1a3b1496 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -147,7 +147,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(unsigned int __nocast gfp_mask, int order); +extern void out_of_memory(gfp_t gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 1a4990e448e9..515046d1b2f4 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -159,7 +159,7 @@ extern unsigned int textsearch_find_continuous(struct ts_config *, #define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1)) static inline struct ts_config *alloc_ts_config(size_t payload, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct ts_config *conf; diff --git a/include/linux/types.h b/include/linux/types.h index 2b678c22ca4a..0aee34f9da9f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -165,6 +165,10 @@ typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; #endif +#ifdef __KERNEL__ +typedef unsigned __nocast gfp_t; +#endif + struct ustat { __kernel_daddr_t f_tfree; __kernel_ino_t f_tinode; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b244f69ef682..3701a0673d2c 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -34,8 +34,8 @@ struct vm_struct { extern void *vmalloc(unsigned long size); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); -extern void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot); -extern void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, -- cgit v1.2.1 From 3dd083255ddcfa87751fa8e32f61a9547a15a541 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 9 Oct 2005 21:19:40 +0200 Subject: [PATCH] x86_64: Set up safe page tables during resume The following patch makes swsusp avoid the possible temporary corruption of page translation tables during resume on x86-64. This is achieved by creating a copy of the relevant page tables that will not be modified by swsusp and can be safely used by it on resume. The problem is that during resume on x86-64 swsusp may temporarily corrupt the page tables used for the direct mapping of RAM. If that happens, a page fault occurs and cannot be handled properly, which leads to the solid hang of the affected system. This leads to the loss of the system's state from before suspend and may result in the loss of data or the corruption of filesystems, so it is a serious issue. Also, it appears to happen quite often (for me, as often as 50% of the time). The problem is related to the fact that (at least) one of the PMD entries used in the direct memory mapping (starting at PAGE_OFFSET) points to a page table the physical address of which is much greater than the physical address of the PMD entry itself. Moreover, unfortunately, the physical address of the page table before suspend (i.e. the one stored in the suspend image) happens to be different to the physical address of the corresponding page table used during resume (i.e. the one that is valid right before swsusp_arch_resume() in arch/x86_64/kernel/suspend_asm.S is executed). Thus while the image is restored, the "offending" PMD entry gets overwritten, so it does not point to the right physical address any more (i.e. there's no page table at the address pointed to by it, because it points to the address the page table has been at during suspend). Consequently, if the PMD entry is used later on, and it _is_ used in the process of copying the image pages, a page fault occurs, but it cannot be handled in the normal way and the system hangs. In principle we can call create_resume_mapping() from swsusp_arch_resume() (ie. from suspend_asm.S), but then the memory allocations in create_resume_mapping(), resume_pud_mapping(), and resume_pmd_mapping() must be made carefully so that we use _only_ NosaveFree pages in them (the other pages are overwritten by the loop in swsusp_arch_resume()). Additionally, we are in atomic context at that time, so we cannot use GFP_KERNEL. Moreover, if one of the allocations fails, we should free all of the allocated pages, so we need to trace them somehow. All of this is done in the appended patch, except that the functions populating the page tables are located in arch/x86_64/kernel/suspend.c rather than in init.c. It may be done in a more elegan way in the future, with the help of some swsusp patches that are in the works now. [AK: move some externs into headers, renamed a function] Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index f2e96fdfaae0..ad15a54806d8 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -71,5 +71,7 @@ void restore_processor_state(void); struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); +extern unsigned long get_usable_page(unsigned gfp_mask); +extern void free_eaten_memory(void); #endif /* _LINUX_SWSUSP_H */ -- cgit v1.2.1 From 46113830a18847cff8da73005e57bc49c2f95a56 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 19:44:29 +0200 Subject: [PATCH] Fix signal sending in usbdevio on async URB completion If a process issues an URB from userspace and (starts to) terminate before the URB comes back, we run into the issue described above. This is because the urb saves a pointer to "current" when it is posted to the device, but there's no guarantee that this pointer is still valid afterwards. In fact, there are three separate issues: 1) the pointer to "current" can become invalid, since the task could be completely gone when the URB completion comes back from the device. 2) Even if the saved task pointer is still pointing to a valid task_struct, task_struct->sighand could have gone meanwhile. 3) Even if the process is perfectly fine, permissions may have changed, and we can no longer send it a signal. So what we do instead, is to save the PID and uid's of the process, and introduce a new kill_proc_info_as_uid() function. Signed-off-by: Harald Welte [ Fixed up types and added symbol exports ] Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c3ba31f210a9..27519df0f987 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1018,6 +1018,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int kill_pg_info(int, struct siginfo *, pid_t); extern int kill_proc_info(int, struct siginfo *, pid_t); +extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t); extern void do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); -- cgit v1.2.1 From ebe0bbf06c9e03613bdcb6b5a704595a9344b7ff Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:19 -0700 Subject: [NETFILTER] nfnetlink: use highest bit of nfa_type to indicate nested TLV As Henrik Nordstrom pointed out, all our efforts with "split endian" (i.e. host byte order tags, net byte order values) are useless, unless a parser can determine whether an attribute is nested or not. This patch steals the highest bit of nfattr.nfa_type to indicate whether the data payload contains a nested nfattr (1) or not (0). This will break userspace compatibility, but luckily no kernel with nfnetlink was released so far. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1d5b10ae2399..f08e870100f4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,11 +41,15 @@ enum nfnetlink_groups { struct nfattr { u_int16_t nfa_len; - u_int16_t nfa_type; + u_int16_t nfa_type; /* we use 15 bits for the type, and the highest + * bit to indicate whether the payload is nested */ } __attribute__ ((packed)); -/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time - * to put this in a generic file */ +/* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from + * rtnetlink.h, it's time to put this in a generic file */ + +#define NFNL_NFA_NEST 0x8000 +#define NFA_TYPE(attr) ((attr)->nfa_type & 0x7fff) #define NFA_ALIGNTO 4 #define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) @@ -59,7 +63,7 @@ struct nfattr #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) #define NFA_NEST(skb, type) \ ({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ - NFA_PUT(skb, type, 0, NULL); \ + NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \ __start; }) #define NFA_NEST_END(skb, start) \ ({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ -- cgit v1.2.1 From b3a91d037a2575040f9b6a483f60c407a3d80368 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:52:36 -0700 Subject: [NETFILTER] nat: remove bogus structure member When 'rustynat' was merged in 2.6.12, the use of the "helper" pointer of struct ipt_nat_info was obsoleted, but the pointer not removed from the struct. This patch removes the pointer, thereby yet again shrinking struct ip_conntrack. Discovered-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e201ec6e9905..41a107de17cf 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -58,10 +58,6 @@ extern rwlock_t ip_nat_lock; struct ip_nat_info { struct list_head bysource; - - /* Helper (NULL if none). */ - struct ip_nat_helper *helper; - struct ip_nat_seq seq[IP_CT_DIR_MAX]; }; -- cgit v1.2.1 From 5bbc243aafff9ad653dc7a9fa7bcaf0b4631355a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 20:54:01 -0700 Subject: [NETFILTER]: Add missing include to ip_conntrack_tuple.h Without this #include, __be16 is not defined and userspace programs will break. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 20e43f018b7c..3232db11a4e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -1,6 +1,8 @@ #ifndef _IP_CONNTRACK_TUPLE_H #define _IP_CONNTRACK_TUPLE_H +#include + /* A `tuple' is a structure containing the information to uniquely identify a connection. ie. if two packets have the same tuple, they are in the same connection; if not, they are not. -- cgit v1.2.1 From e1c73b78e3706bd3c336d4730a01dd4081dfb7ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 20:55:49 -0700 Subject: [NETFILTER] ctnetlink: add one nesting level for TCP state To keep consistency, the TCP private protocol information is nested attributes under CTA_PROTOINFO_TCP. This way the sequence of attributes to access the TCP state information looks like here below: CTA_PROTOINFO CTA_PROTOINFO_TCP CTA_PROTOINFO_TCP_STATE instead of: CTA_PROTOINFO CTA_PROTOINFO_TCP_STATE Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 5c55751c78e4..fb5511030185 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -70,11 +70,18 @@ enum ctattr_l4proto { enum ctattr_protoinfo { CTA_PROTOINFO_UNSPEC, - CTA_PROTOINFO_TCP_STATE, + CTA_PROTOINFO_TCP, __CTA_PROTOINFO_MAX }; #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) +enum ctattr_protoinfo_tcp { + CTA_PROTOINFO_TCP_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_TCP_MAX +}; +#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) + enum ctattr_counters { CTA_COUNTERS_UNSPEC, CTA_COUNTERS_PACKETS, -- cgit v1.2.1 From a051a8f7306476af0a74370ad56e793cb6c43bf7 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 21:21:10 -0700 Subject: [NETFILTER]: Use only 32bit counters for CONNTRACK_ACCT Initially we used 64bit counters for conntrack-based accounting, since we had no event mechanism to tell userspace that our counters are about to overflow. With nfnetlink_conntrack, we now have such a event mechanism and thus can save 16bytes per connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 6 ++++-- include/linux/netfilter_ipv4/ip_conntrack.h | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb5511030185..116fcaced909 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -84,8 +84,10 @@ enum ctattr_protoinfo_tcp { enum ctattr_counters { CTA_COUNTERS_UNSPEC, - CTA_COUNTERS_PACKETS, - CTA_COUNTERS_BYTES, + CTA_COUNTERS_PACKETS, /* old 64bit counters */ + CTA_COUNTERS_BYTES, /* old 64bit counters */ + CTA_COUNTERS32_PACKETS, + CTA_COUNTERS32_BYTES, __CTA_COUNTERS_MAX }; #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ced38736813..d078bb91d9e5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -117,6 +117,10 @@ enum ip_conntrack_events /* NAT info */ IPCT_NATINFO_BIT = 10, IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), + + /* Counter highest bit has been set */ + IPCT_COUNTER_FILLING_BIT = 11, + IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), }; enum ip_conntrack_expect_events { @@ -192,8 +196,8 @@ do { \ struct ip_conntrack_counter { - u_int64_t packets; - u_int64_t bytes; + u_int32_t packets; + u_int32_t bytes; }; struct ip_conntrack_helper; -- cgit v1.2.1 From 339231537506846cb232a2f0cc4a2c662b2d5b07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Oct 2005 21:23:28 -0700 Subject: [NETFILTER] ctnetlink: allow userspace to change TCP state This patch adds the ability of changing the state a TCP connection. I know that this must be used with care but it's required to provide a complete conntrack creation via conntrack_netlink. So I'll document this aspect on the upcoming docs. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_protocol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index b6b99be8632a..2c76b879e3dc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -52,6 +52,9 @@ struct ip_conntrack_protocol int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct); + /* convert nfnetlink attributes to protoinfo */ + int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct); + int (*tuple_to_nfattr)(struct sk_buff *skb, const struct ip_conntrack_tuple *t); int (*nfattr_to_tuple)(struct nfattr *tb[], -- cgit v1.2.1 From afb997c6163b33292d31a09d6aa5cbb03ffa5bf1 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 12 Oct 2005 15:12:21 -0700 Subject: [NETPOLL]: wrong return for null netpoll_poll_lock() When netpoll is not being used, the macro that defines the removed routing netpoll_poll_lock defines the return as zero, but the real routine returns a `void *` Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/netpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 5ade54a78dbb..ca5a8733000f 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -86,7 +86,7 @@ static inline void netpoll_poll_unlock(void *have) #else #define netpoll_rx(a) 0 -#define netpoll_poll_lock(a) 0 +#define netpoll_poll_lock(a) NULL #define netpoll_poll_unlock(a) #endif -- cgit v1.2.1 From c8923c6b852d3a97c1faad0566e38fca330375a7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 14:41:23 -0700 Subject: [NETFILTER]: Fix OOPSes on machines with discontiguous cpu numbering. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original patch by Harald Welte, with feedback from Herbert Xu and testing by Sébastien Bernard. EBTABLES, ARP tables, and IP/IP6 tables all assume that cpus are numbered linearly. That is not necessarily true. This patch fixes that up by calculating the largest possible cpu number, and allocating enough per-cpu structure space given that. Signed-off-by: David S. Miller --- include/linux/cpumask.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b15826f6e3a2..fe9778301d07 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -392,4 +392,16 @@ extern cpumask_t cpu_present_map; #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) +/* Find the highest possible smp_processor_id() */ +static inline unsigned int highest_possible_processor_id(void) +{ + unsigned int cpu, highest = 0; + + for_each_cpu_mask(cpu, cpu_possible_map) + highest = cpu; + + return highest; +} + + #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.1 From e26148d934762b61133a64b6862f870624ff617d Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Fri, 14 Oct 2005 15:59:05 -0700 Subject: [PATCH] Fix copy-and-paste error in BSD accounting Fix copy and paste error in jiffies_to_AHZ conversion which leads to wrong BSD accounting information on alpha and ia64 when CONFIG_BSD_PROCESS_ACCT_V3 is turned on. Also update comment to match reorganised header files. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acct.h b/include/linux/acct.h index 1993a3691768..19f70462b3be 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -162,13 +162,13 @@ typedef struct acct acct_t; #ifdef __KERNEL__ /* * Yet another set of HZ to *HZ helper functions. - * See for the original. + * See for the original. */ static inline u32 jiffies_to_AHZ(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 - return x / (HZ / USER_HZ); + return x / (HZ / AHZ); #else u64 tmp = (u64)x * TICK_NSEC; do_div(tmp, (NSEC_PER_SEC / AHZ)); -- cgit v1.2.1 From 688ce17b8599abc548b406c00e4d18ae0dec954f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Oct 2005 00:17:33 -0700 Subject: [PATCH]: highest_possible_processor_id() has to be a macro ... otherwise, things like alpha and sparc64 break and break badly. They define cpu_possible_map to something else in smp.h *AFTER* having included cpumask.h. If that puppy is a macro, expansion will happen at the actual caller, when we'd already seen #define cpu_possible_map ... and we will get the right thing used. As an inline helper it will be tokenized before we get to that define and that's it; no matter what we define later, it won't affect anything. We get modules with dependency on cpu_possible_map instead of the right symbol (phys_cpu_present_map in case of sparc64), or outright link errors if they are built-in. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/cpumask.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe9778301d07..9bdba8169b41 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -393,15 +393,13 @@ extern cpumask_t cpu_present_map; #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) /* Find the highest possible smp_processor_id() */ -static inline unsigned int highest_possible_processor_id(void) -{ - unsigned int cpu, highest = 0; - - for_each_cpu_mask(cpu, cpu_possible_map) - highest = cpu; - - return highest; -} +#define highest_possible_processor_id() \ +({ \ + unsigned int cpu, highest = 0; \ + for_each_cpu_mask(cpu, cpu_possible_map) \ + highest = cpu; \ + highest; \ +}) #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.1 From b24d18aa743dad0c42918157c5d717686269d3a8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 16 Oct 2005 20:29:20 -0700 Subject: [PATCH] list: add missing rcu_dereference on first element It seems that all the list_*_rcu primitives are missing a memory barrier on the very first dereference. For example, #define list_for_each_rcu(pos, head) \ for (pos = (head)->next; prefetch(pos->next), pos != (head); \ pos = rcu_dereference(pos->next)) It will go something like: pos = (head)->next prefetch(pos->next) pos != (head) do stuff We're missing a barrier here. pos = rcu_dereference(pos->next) fetch pos->next barrier given by rcu_dereference(pos->next) store pos Without the missing barrier, the pos->next value may turn out to be stale. In fact, if "do stuff" were also dereferencing pos and relying on list_for_each_rcu to provide the barrier then it may also break. So here is a patch to make sure that we have a barrier for the first element in the list. Signed-off-by: Herbert Xu Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index e6ec59682274..084971f333fe 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -442,12 +442,14 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; pos != (head); \ - pos = rcu_dereference(pos->next)) + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) /** * list_for_each_safe_rcu - iterate over an rcu-protected list safe @@ -461,8 +463,9 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = rcu_dereference(n), n = pos->next) + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -474,11 +477,11 @@ static inline void list_splice_init(struct list_head *list, * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = rcu_dereference(list_entry(pos->member.next, \ - typeof(*pos), member))) +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) /** @@ -492,8 +495,9 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) /* * Double linked lists with a single pointer list head. @@ -696,8 +700,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, pos = n) #define hlist_for_each_rcu(pos, head) \ - for ((pos) = (head)->first; pos && ({ prefetch((pos)->next); 1; }); \ - (pos) = rcu_dereference((pos)->next)) + for ((pos) = (head)->first; \ + rcu_dereference((pos)) && ({ prefetch((pos)->next); 1; }); \ + (pos) = (pos)->next) /** * hlist_for_each_entry - iterate over list of given type @@ -762,9 +767,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) + pos = pos->next) #else #warning "don't include kernel headers in userspace" -- cgit v1.2.1 From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2005 20:01:21 +0200 Subject: [PATCH] rcu: keep rcu callback event counter This makes call_rcu() keep track of how many events there are on the RCU list, and cause a reschedule event when the list gets too long. This helps keep RCU event lists down. Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4e65eb44adfd..70191a5a148f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -94,6 +94,7 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; + long count; /* # of queued items */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; -- cgit v1.2.1 From 4faa5285283fad081443e3612ca426a311bb6c7e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2005 16:43:33 -0700 Subject: [PATCH] aio: revert lock_kiocb() lock_kiocb() was introduced to serialize retrying and cancellation. In the process of doing so it tried to sleep waiting for KIF_LOCKED while holding the ctx_lock spinlock. Recent fixes have ensured that multiple concurrent retries won't be attempted for a given iocb. Cancel has other problems and has no significant in-tree users that have been complaining about it. So for the immediate future we'll revert sleeping with the lock held and will address proper cancellation and retry serialization in the future. Signed-off-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 60def658b246..0decf66117c1 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -24,7 +24,12 @@ struct kioctx; #define KIOCB_SYNC_KEY (~0U) /* ki_flags bits */ -#define KIF_LOCKED 0 +/* + * This may be used for cancel/retry serialization in the future, but + * for now it's unused and we probably don't want modules to even + * think they can use it. + */ +/* #define KIF_LOCKED 0 */ #define KIF_KICKED 1 #define KIF_CANCELLED 2 -- cgit v1.2.1