diff options
Diffstat (limited to 'net')
613 files changed, 27370 insertions, 16175 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index 282c4ab1abe6..2a27e37bc4cb 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -10,7 +10,6 @@ * v 1.0 03/22/99 */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/types.h> diff --git a/net/802/fddi.c b/net/802/fddi.c index ac242a4bc346..797c6d961deb 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -26,7 +26,6 @@ * Maciej W. Rozycki : IPv6 support */ -#include <linux/config.h> #include <linux/module.h> #include <asm/system.h> #include <linux/types.h> diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c index 700129556c13..ead56037398b 100644 --- a/net/802/sysctl_net_802.c +++ b/net/802/sysctl_net_802.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/if_tr.h> #include <linux/sysctl.h> diff --git a/net/802/tr.c b/net/802/tr.c index e9dc803f2fe0..829deb41ce81 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -165,7 +164,7 @@ static int tr_rebuild_header(struct sk_buff *skb) */ if(trllc->ethertype != htons(ETH_P_IP)) { - printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(trllc->ethertype)); + printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype)); return 0; } @@ -187,7 +186,7 @@ static int tr_rebuild_header(struct sk_buff *skb) * it via SNAP. */ -unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev) { struct trh_hdr *trh=(struct trh_hdr *)skb->data; @@ -230,15 +229,15 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) */ if (trllc->dsap == EXTENDED_SAP && - (trllc->ethertype == ntohs(ETH_P_IP) || - trllc->ethertype == ntohs(ETH_P_IPV6) || - trllc->ethertype == ntohs(ETH_P_ARP))) + (trllc->ethertype == htons(ETH_P_IP) || + trllc->ethertype == htons(ETH_P_IPV6) || + trllc->ethertype == htons(ETH_P_ARP))) { skb_pull(skb, sizeof(struct trllc)); return trllc->ethertype; } - return ntohs(ETH_P_TR_802_2); + return htons(ETH_P_TR_802_2); } /* diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3948949a609a..18fcb9fa518d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -67,10 +67,6 @@ static struct packet_type vlan_packet_type = { .func = vlan_skb_recv, /* VLAN receive method */ }; -/* Bits of netdev state that are propagated from real device to virtual */ -#define VLAN_LINK_STATE_MASK \ - ((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER)|(1<<__LINK_STATE_DORMANT)) - /* End of global variables definitions. */ /* @@ -364,6 +360,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + + /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. @@ -460,6 +464,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup); + if (new_dev == NULL) goto out_unlock; @@ -470,7 +475,9 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, new_dev->flags = real_dev->flags; new_dev->flags &= ~IFF_UP; - new_dev->state = real_dev->state & ~(1<<__LINK_STATE_START); + new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | + (1<<__LINK_STATE_DORMANT))) | + (1<<__LINK_STATE_PRESENT); /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. @@ -518,6 +525,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, if (register_netdevice(new_dev)) goto out_free_newdev; + lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); + new_dev->iflink = real_dev->ifindex; vlan_transfer_operstate(real_dev, new_dev); linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ @@ -531,12 +540,11 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, * so it cannot "appear" on us. */ if (!grp) { /* need to add a new group */ - grp = kmalloc(sizeof(struct vlan_group), GFP_KERNEL); + grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); if (!grp) goto out_free_unregister; /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */ - memset(grp, 0, sizeof(struct vlan_group)); grp->real_dev_ifindex = real_dev->ifindex; hlist_add_head_rcu(&grp->hlist, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 7b214cffc956..a8fc0de1f969 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -17,7 +17,6 @@ * Jan 20, 1998 Ben Greear Initial Version *****************************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/stddef.h> /* offsetof(), etc. */ #include <linux/errno.h> /* return codes */ diff --git a/net/Kconfig b/net/Kconfig index c6cec5aa5486..a81aca43932f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -177,7 +177,7 @@ source "net/lapb/Kconfig" config NET_DIVERT bool "Frame Diverter (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && BROKEN ---help--- The Frame Diverter allows you to divert packets from the network, that are not aimed at the interface receiving it (in @@ -231,7 +231,7 @@ config NET_TCPPROBE TCP congestion avoidance modules. If you don't understand what was just said, you don't need it: say N. - Documentation on how to use the packet generator can be found + Documentation on how to use TCP connection probing can be found at http://linux-net.osdl.org/index.php/TcpProbe To compile this code as a module, choose M here: the @@ -249,6 +249,11 @@ source "net/ieee80211/Kconfig" config WIRELESS_EXT bool +source "net/netlabel/Kconfig" + +config FIB_RULES + bool + endif # if NET endmenu # Networking diff --git a/net/Makefile b/net/Makefile index 065796f5fb17..ad4d14f4bb29 100644 --- a/net/Makefile +++ b/net/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ +obj-$(CONFIG_NETLABEL) += netlabel/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 7076097debc2..f3777ec5bcb9 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -29,7 +29,6 @@ * */ -#include <linux/config.h> #include <linux/if_arp.h> #include <net/sock.h> #include <net/datalink.h> diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index dc4048dd98c1..7ae4916cd26d 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -8,7 +8,6 @@ * Free Software Foundation, version 2. */ -#include <linux/config.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7b1eb9a4fc96..708e2e0371af 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -51,7 +51,6 @@ * */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/if_arp.h> @@ -228,12 +227,11 @@ static void atif_drop_device(struct net_device *dev) static struct atalk_iface *atif_add_device(struct net_device *dev, struct atalk_addr *sa) { - struct atalk_iface *iface = kmalloc(sizeof(*iface), GFP_KERNEL); + struct atalk_iface *iface = kzalloc(sizeof(*iface), GFP_KERNEL); if (!iface) goto out; - memset(iface, 0, sizeof(*iface)); dev_hold(dev); iface->dev = dev; dev->atalk_ptr = iface; @@ -560,12 +558,11 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint) } if (!rt) { - rt = kmalloc(sizeof(*rt), GFP_ATOMIC); + rt = kzalloc(sizeof(*rt), GFP_ATOMIC); retval = -ENOBUFS; if (!rt) goto out_unlock; - memset(rt, 0, sizeof(*rt)); rt->next = atalk_routes; atalk_routes = rt; @@ -1005,7 +1002,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, return sum; } -static unsigned short atalk_checksum(const struct sk_buff *skb, int len) +static __be16 atalk_checksum(const struct sk_buff *skb, int len) { unsigned long sum; @@ -1013,7 +1010,7 @@ static unsigned short atalk_checksum(const struct sk_buff *skb, int len) sum = atalk_sum_skb(skb, 4, len-4, 0); /* Use 0xFFFF for 0. 0 itself means none */ - return sum ? htons((unsigned short)sum) : 0xFFFF; + return sum ? htons((unsigned short)sum) : htons(0xFFFF); } static struct proto ddp_proto = { @@ -1292,7 +1289,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb) #endif static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev, - struct ddpehdr *ddp, struct ddpebits *ddphv, + struct ddpehdr *ddp, __u16 len_hops, int origlen) { struct atalk_route *rt; @@ -1320,10 +1317,12 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev, /* Route the packet */ rt = atrtr_find(&ta); - if (!rt || ddphv->deh_hops == DDP_MAXHOPS) + /* increment hops count */ + len_hops += 1 << 10; + if (!rt || !(len_hops & (15 << 10))) goto free_it; + /* FIXME: use skb->cb to be able to use shared skbs */ - ddphv->deh_hops++; /* * Route goes through another gateway, so set the target to the @@ -1338,11 +1337,10 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev, /* Fix up skb->len field */ skb_trim(skb, min_t(unsigned int, origlen, (rt->dev->hard_header_len + - ddp_dl->header_length + ddphv->deh_len))); + ddp_dl->header_length + (len_hops & 1023)))); - /* Mend the byte order */ /* FIXME: use skb->cb to be able to use shared skbs */ - *((__u16 *)ddp) = ntohs(*((__u16 *)ddphv)); + ddp->deh_len_hops = htons(len_hops); /* * Send the buffer onwards @@ -1397,7 +1395,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, struct atalk_iface *atif; struct sockaddr_at tosat; int origlen; - struct ddpebits ddphv; + __u16 len_hops; /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) @@ -1409,16 +1407,11 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, ddp = ddp_hdr(skb); - /* - * Fix up the length field [Ok this is horrible but otherwise - * I end up with unions of bit fields and messy bit field order - * compiler/endian dependencies..] - */ - *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp)); + len_hops = ntohs(ddp->deh_len_hops); /* Trim buffer in case of stray trailing data */ origlen = skb->len; - skb_trim(skb, min_t(unsigned int, skb->len, ddphv.deh_len)); + skb_trim(skb, min_t(unsigned int, skb->len, len_hops & 1023)); /* * Size check to see if ddp->deh_len was crap @@ -1433,7 +1426,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, * valid for net byte orders all over the networking code... */ if (ddp->deh_sum && - atalk_checksum(skb, ddphv.deh_len) != ddp->deh_sum) + atalk_checksum(skb, len_hops & 1023) != ddp->deh_sum) /* Not a valid AppleTalk frame - dustbin time */ goto freeit; @@ -1447,7 +1440,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, /* Not ours, so we route the packet via the correct * AppleTalk iface */ - atalk_route_packet(skb, dev, ddp, &ddphv, origlen); + atalk_route_packet(skb, dev, ddp, len_hops, origlen); goto out; } @@ -1492,7 +1485,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, /* Find our address */ struct atalk_addr *ap = atalk_find_dev_addr(dev); - if (!ap || skb->len < sizeof(struct ddpshdr)) + if (!ap || skb->len < sizeof(__be16) || skb->len > 1023) goto freeit; /* Don't mangle buffer if shared */ @@ -1522,11 +1515,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, /* * Not sure about this bit... */ - ddp->deh_len = skb->len; - ddp->deh_hops = DDP_MAXHOPS; /* Non routable, so force a drop - if we slip up later */ - /* Mend the byte order */ - *((__u16 *)ddp) = htons(*((__u16 *)ddp)); + /* Non routable, so force a drop if we slip up later */ + ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10)); } skb->h.raw = skb->data; @@ -1625,16 +1615,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr)); - ddp->deh_pad = 0; - ddp->deh_hops = 0; - ddp->deh_len = len + sizeof(*ddp); - /* - * Fix up the length field [Ok this is horrible but otherwise - * I end up with unions of bit fields and messy bit field order - * compiler/endian dependencies.. - */ - *((__u16 *)ddp) = ntohs(*((__u16 *)ddp)); - + ddp->deh_len_hops = htons(len + sizeof(*ddp)); ddp->deh_dnet = usat->sat_addr.s_net; ddp->deh_snet = at->src_net; ddp->deh_dnode = usat->sat_addr.s_node; @@ -1715,8 +1696,8 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; + int offset = 0; int err = 0; - struct ddpebits ddphv; struct sk_buff *skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1724,25 +1705,18 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr /* FIXME: use skb->cb to be able to use shared skbs */ ddp = ddp_hdr(skb); - *((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp)); + copied = ntohs(ddp->deh_len_hops) & 1023; - if (sk->sk_type == SOCK_RAW) { - copied = ddphv.deh_len; - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } + if (sk->sk_type != SOCK_RAW) { + offset = sizeof(*ddp); + copied -= offset; + } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - } else { - copied = ddphv.deh_len - sizeof(*ddp); - if (copied > size) { - copied = size; - msg->msg_flags |= MSG_TRUNC; - } - err = skb_copy_datagram_iovec(skb, sizeof(*ddp), - msg->msg_iov, copied); + if (copied > size) { + copied = size; + msg->msg_flags |= MSG_TRUNC; } + err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); if (!err) { if (sat) { diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index af7f0604395d..40b0af7437a2 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -6,7 +6,6 @@ * Dynamic registration, added aarp entries. (5/30/97 Chris Horn) */ -#include <linux/config.h> #include <linux/sysctl.h> #include <net/sock.h> #include <linux/atalk.h> diff --git a/net/atm/Makefile b/net/atm/Makefile index d5818751f6ba..89656d6c0b90 100644 --- a/net/atm/Makefile +++ b/net/atm/Makefile @@ -2,7 +2,7 @@ # Makefile for the ATM Protocol Families. # -atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o +atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o atm_sysfs.o mpoa-objs := mpc.o mpoa_caches.o mpoa_proc.o obj-$(CONFIG_ATM) += atm.o diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c new file mode 100644 index 000000000000..c0a4ae28fcfa --- /dev/null +++ b/net/atm/atm_sysfs.c @@ -0,0 +1,175 @@ +/* ATM driver model support. */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/kobject.h> +#include <linux/atmdev.h> +#include "common.h" +#include "resources.h" + +#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev) + +static ssize_t show_type(struct class_device *cdev, char *buf) +{ + struct atm_dev *adev = to_atm_dev(cdev); + return sprintf(buf, "%s\n", adev->type); +} + +static ssize_t show_address(struct class_device *cdev, char *buf) +{ + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + int i; + + for (i = 0; i < (ESI_LEN - 1); i++) + pos += sprintf(pos, "%02x:", adev->esi[i]); + pos += sprintf(pos, "%02x\n", adev->esi[i]); + + return pos - buf; +} + +static ssize_t show_atmaddress(struct class_device *cdev, char *buf) +{ + unsigned long flags; + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + struct atm_dev_addr *aaddr; + int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin; + int i, j; + + spin_lock_irqsave(&adev->lock, flags); + list_for_each_entry(aaddr, &adev->local, entry) { + for(i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) { + if (j == *fmt) { + pos += sprintf(pos, "."); + ++fmt; + j = 0; + } + pos += sprintf(pos, "%02x", aaddr->addr.sas_addr.prv[i]); + } + pos += sprintf(pos, "\n"); + } + spin_unlock_irqrestore(&adev->lock, flags); + + return pos - buf; +} + +static ssize_t show_carrier(struct class_device *cdev, char *buf) +{ + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + + pos += sprintf(pos, "%d\n", + adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); + + return pos - buf; +} + +static ssize_t show_link_rate(struct class_device *cdev, char *buf) +{ + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + int link_rate; + + /* show the link rate, not the data rate */ + switch (adev->link_rate) { + case ATM_OC3_PCR: + link_rate = 155520000; + break; + case ATM_OC12_PCR: + link_rate = 622080000; + break; + case ATM_25_PCR: + link_rate = 25600000; + break; + default: + link_rate = adev->link_rate * 8 * 53; + } + pos += sprintf(pos, "%d\n", link_rate); + + return pos - buf; +} + +static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static CLASS_DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL); +static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); +static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static CLASS_DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL); + +static struct class_device_attribute *atm_attrs[] = { + &class_device_attr_atmaddress, + &class_device_attr_address, + &class_device_attr_carrier, + &class_device_attr_type, + &class_device_attr_link_rate, + NULL +}; + +static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) +{ + struct atm_dev *adev; + int i = 0, len = 0; + + if (!cdev) + return -ENODEV; + + adev = to_atm_dev(cdev); + if (!adev) + return -ENODEV; + + if (add_uevent_var(envp, num_envp, &i, buf, size, &len, + "NAME=%s%d", adev->type, adev->number)) + return -ENOMEM; + + envp[i] = NULL; + return 0; +} + +static void atm_release(struct class_device *cdev) +{ + struct atm_dev *adev = to_atm_dev(cdev); + + kfree(adev); +} + +static struct class atm_class = { + .name = "atm", + .release = atm_release, + .uevent = atm_uevent, +}; + +int atm_register_sysfs(struct atm_dev *adev) +{ + struct class_device *cdev = &adev->class_dev; + int i, err; + + cdev->class = &atm_class; + class_set_devdata(cdev, adev); + + snprintf(cdev->class_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); + err = class_device_register(cdev); + if (err < 0) + return err; + + for (i = 0; atm_attrs[i]; i++) + class_device_create_file(cdev, atm_attrs[i]); + + return 0; +} + +void atm_unregister_sysfs(struct atm_dev *adev) +{ + struct class_device *cdev = &adev->class_dev; + + class_device_del(cdev); +} + +int __init atm_sysfs_init(void) +{ + return class_register(&atm_class); +} + +void __exit atm_sysfs_exit(void) +{ + class_unregister(&atm_class); +} diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 680ccb12aae8..d00cca97eb33 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -5,7 +5,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary */ #include <linux/module.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> @@ -509,10 +508,9 @@ Note: we do not have explicit unassign, but look at _push() if (copy_from_user(&be, arg, sizeof be)) return -EFAULT; - brvcc = kmalloc(sizeof(struct br2684_vcc), GFP_KERNEL); + brvcc = kzalloc(sizeof(struct br2684_vcc), GFP_KERNEL); if (!brvcc) return -ENOMEM; - memset(brvcc, 0, sizeof(struct br2684_vcc)); write_lock_irq(&devs_lock); net_dev = br2684_find_dev(&be.ifspec); if (net_dev == NULL) { diff --git a/net/atm/clip.c b/net/atm/clip.c index f92f9c94d2c7..7af2c411da82 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -2,7 +2,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/kernel.h> /* for UINT_MAX */ @@ -24,6 +23,7 @@ #include <linux/if.h> /* for IFF_UP */ #include <linux/inetdevice.h> #include <linux/bitops.h> +#include <linux/poison.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> @@ -267,7 +267,7 @@ static void clip_neigh_destroy(struct neighbour *neigh) DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh); if (NEIGH2ENTRY(neigh)->vccs) printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n"); - NEIGH2ENTRY(neigh)->vccs = (void *) 0xdeadbeef; + NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD; } static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) @@ -500,9 +500,11 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) } else { unsigned int len = skb->len; + skb_get(skb); clip_push(vcc, skb); PRIV(skb->dev)->stats.rx_packets--; PRIV(skb->dev)->stats.rx_bytes -= len; + kfree_skb(skb); } return 0; } @@ -929,12 +931,11 @@ static int arp_seq_open(struct inode *inode, struct file *file) struct seq_file *seq; int rc = -EAGAIN; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) { rc = -ENOMEM; goto out_kfree; } - memset(state, 0, sizeof(*state)); state->ns.neigh_sub_iter = clip_seq_sub_iter; rc = seq_open(file, &arp_seq_ops); @@ -962,7 +963,6 @@ static struct file_operations arp_seq_fops = { static int __init atm_clip_init(void) { - struct proc_dir_entry *p; neigh_table_init_no_netlink(&clip_tbl); clip_tbl_hook = &clip_tbl; @@ -972,9 +972,15 @@ static int __init atm_clip_init(void) setup_timer(&idle_timer, idle_timer_check, 0); - p = create_proc_entry("arp", S_IRUGO, atm_proc_root); - if (p) - p->proc_fops = &arp_seq_fops; +#ifdef CONFIG_PROC_FS + { + struct proc_dir_entry *p; + + p = create_proc_entry("arp", S_IRUGO, atm_proc_root); + if (p) + p->proc_fops = &arp_seq_fops; + } +#endif return 0; } diff --git a/net/atm/common.c b/net/atm/common.c index ae002220fa99..fbabff494468 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -3,7 +3,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> #include <linux/net.h> /* struct socket, struct proto_ops */ @@ -791,8 +790,14 @@ static int __init atm_init(void) printk(KERN_ERR "atm_proc_init() failed with %d\n",error); goto out_atmsvc_exit; } + if ((error = atm_sysfs_init()) < 0) { + printk(KERN_ERR "atm_sysfs_init() failed with %d\n",error); + goto out_atmproc_exit; + } out: return error; +out_atmproc_exit: + atm_proc_exit(); out_atmsvc_exit: atmsvc_exit(); out_atmpvc_exit: @@ -805,6 +810,7 @@ out_unregister_vcc_proto: static void __exit atm_exit(void) { atm_proc_exit(); + atm_sysfs_exit(); atmsvc_exit(); atmpvc_exit(); proto_unregister(&vcc_proto); diff --git a/net/atm/common.h b/net/atm/common.h index 4887c317cefe..a422da7788fb 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -28,6 +28,8 @@ int atmpvc_init(void); void atmpvc_exit(void); int atmsvc_init(void); void atmsvc_exit(void); +int atm_sysfs_init(void); +void atm_sysfs_exit(void); #ifdef CONFIG_PROC_FS int atm_proc_init(void); diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 851cfa6312af..8c2022c3e81d 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -4,7 +4,6 @@ /* 2003 John Levon <levon@movementarian.org> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> #include <linux/net.h> /* struct socket, struct proto_ops */ diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c index 4b1faca5013f..1d3de42fada0 100644 --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c @@ -25,22 +25,27 @@ /* * skb_migrate appends the list at "from" to "to", emptying "from" in the * process. skb_migrate is atomic with respect to all other skb operations on - * "from" and "to". Note that it locks both lists at the same time, so beware - * of potential deadlocks. + * "from" and "to". Note that it locks both lists at the same time, so to deal + * with the lock ordering, the locks are taken in address order. * * This function should live in skbuff.c or skbuff.h. */ -void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) +void skb_migrate(struct sk_buff_head *from, struct sk_buff_head *to) { unsigned long flags; struct sk_buff *skb_from = (struct sk_buff *) from; struct sk_buff *skb_to = (struct sk_buff *) to; struct sk_buff *prev; - spin_lock_irqsave(&from->lock,flags); - spin_lock(&to->lock); + if ((unsigned long) from < (unsigned long) to) { + spin_lock_irqsave(&from->lock, flags); + spin_lock_nested(&to->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irqsave(&to->lock, flags); + spin_lock_nested(&from->lock, SINGLE_DEPTH_NESTING); + } prev = from->prev; from->next->prev = to->prev; prev->next = skb_to; @@ -51,7 +56,7 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) from->prev = skb_from; from->next = skb_from; from->qlen = 0; - spin_unlock_irqrestore(&from->lock,flags); + spin_unlock_irqrestore(&from->lock, flags); } diff --git a/net/atm/lec.c b/net/atm/lec.c index c4fc722fef9a..66c57c1091a8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1,10 +1,9 @@ /* * lec.c: Lan Emulation driver - * Marko Kiiskila mkiiskila@yahoo.com * + * Marko Kiiskila <mkiiskila@yahoo.com> */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/capability.h> @@ -39,7 +38,7 @@ #include <linux/if_bridge.h> #include "../bridge/br_private.h" -static unsigned char bridge_ula_lec[] = {0x01, 0x80, 0xc2, 0x00, 0x00}; +static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; #endif /* Modular too */ @@ -56,38 +55,41 @@ static unsigned char bridge_ula_lec[] = {0x01, 0x80, 0xc2, 0x00, 0x00}; #define DPRINTK(format,args...) #endif -#define DUMP_PACKETS 0 /* 0 = None, - * 1 = 30 first bytes - * 2 = Whole packet - */ +#define DUMP_PACKETS 0 /* + * 0 = None, + * 1 = 30 first bytes + * 2 = Whole packet + */ -#define LEC_UNRES_QUE_LEN 8 /* number of tx packets to queue for a - single destination while waiting for SVC */ +#define LEC_UNRES_QUE_LEN 8 /* + * number of tx packets to queue for a + * single destination while waiting for SVC + */ static int lec_open(struct net_device *dev); static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev); static int lec_close(struct net_device *dev); static struct net_device_stats *lec_get_stats(struct net_device *dev); static void lec_init(struct net_device *dev); -static struct lec_arp_table* lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr); +static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, + unsigned char *mac_addr); static int lec_arp_remove(struct lec_priv *priv, - struct lec_arp_table *to_remove); + struct lec_arp_table *to_remove); /* LANE2 functions */ -static void lane2_associate_ind (struct net_device *dev, u8 *mac_address, - u8 *tlvs, u32 sizeoftlvs); +static void lane2_associate_ind(struct net_device *dev, u8 *mac_address, + u8 *tlvs, u32 sizeoftlvs); static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); -static int lane2_associate_req (struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); + u8 **tlvs, u32 *sizeoftlvs); +static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs); -static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, +static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, unsigned long permanent); static void lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb); static void lec_arp_destroy(struct lec_priv *priv); static void lec_arp_init(struct lec_priv *priv); -static struct atm_vcc* lec_arp_resolve(struct lec_priv *priv, +static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry); @@ -101,16 +103,30 @@ static void lec_set_flush_tran_id(struct lec_priv *priv, unsigned long tran_id); static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, - void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb)); + void (*old_push) (struct atm_vcc *vcc, + struct sk_buff *skb)); static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc); +/* must be done under lec_arp_lock */ +static inline void lec_arp_hold(struct lec_arp_table *entry) +{ + atomic_inc(&entry->usage); +} + +static inline void lec_arp_put(struct lec_arp_table *entry) +{ + if (atomic_dec_and_test(&entry->usage)) + kfree(entry); +} + + static struct lane2_ops lane2_ops = { - lane2_resolve, /* resolve, spec 3.1.3 */ - lane2_associate_req, /* associate_req, spec 3.1.4 */ - NULL /* associate indicator, spec 3.1.5 */ + lane2_resolve, /* resolve, spec 3.1.3 */ + lane2_associate_req, /* associate_req, spec 3.1.4 */ + NULL /* associate indicator, spec 3.1.5 */ }; -static unsigned char bus_mac[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; +static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; @@ -118,36 +134,39 @@ static struct net_device *dev_lec[MAX_LEC_ITF]; #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) { - struct ethhdr *eth; - char *buff; - struct lec_priv *priv; - - /* Check if this is a BPDU. If so, ask zeppelin to send - * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit - * as the Config BPDU has */ - eth = (struct ethhdr *)skb->data; - buff = skb->data + skb->dev->hard_header_len; - if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) { + struct ethhdr *eth; + char *buff; + struct lec_priv *priv; + + /* + * Check if this is a BPDU. If so, ask zeppelin to send + * LE_TOPOLOGY_REQUEST with the same value of Topology Change bit + * as the Config BPDU has + */ + eth = (struct ethhdr *)skb->data; + buff = skb->data + skb->dev->hard_header_len; + if (*buff++ == 0x42 && *buff++ == 0x42 && *buff++ == 0x03) { struct sock *sk; - struct sk_buff *skb2; - struct atmlec_msg *mesg; - - skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (skb2 == NULL) return; - skb2->len = sizeof(struct atmlec_msg); - mesg = (struct atmlec_msg *)skb2->data; - mesg->type = l_topology_change; - buff += 4; - mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */ - - priv = (struct lec_priv *)dev->priv; - atm_force_charge(priv->lecd, skb2->truesize); + struct sk_buff *skb2; + struct atmlec_msg *mesg; + + skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); + if (skb2 == NULL) + return; + skb2->len = sizeof(struct atmlec_msg); + mesg = (struct atmlec_msg *)skb2->data; + mesg->type = l_topology_change; + buff += 4; + mesg->content.normal.flag = *buff & 0x01; /* 0x01 is topology change */ + + priv = (struct lec_priv *)dev->priv; + atm_force_charge(priv->lecd, skb2->truesize); sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk, skb2->len); - } + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk, skb2->len); + } - return; + return; } #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ @@ -163,36 +182,35 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) #ifdef CONFIG_TR static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) { - struct trh_hdr *trh; - int riflen, num_rdsc; - - trh = (struct trh_hdr *)packet; - if (trh->daddr[0] & (uint8_t)0x80) - return bus_mac; /* multicast */ - - if (trh->saddr[0] & TR_RII) { - riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8; - if ((ntohs(trh->rcf) >> 13) != 0) - return bus_mac; /* ARE or STE */ - } - else - return trh->daddr; /* not source routed */ - - if (riflen < 6) - return trh->daddr; /* last hop, source routed */ - - /* riflen is 6 or more, packet has more than one route descriptor */ - num_rdsc = (riflen/2) - 1; - memset(rdesc, 0, ETH_ALEN); - /* offset 4 comes from LAN destination field in LE control frames */ - if (trh->rcf & htons((uint16_t)TR_RCF_DIR_BIT)) - memcpy(&rdesc[4], &trh->rseg[num_rdsc-2], sizeof(uint16_t)); - else { - memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t)); - rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0)); - } - - return NULL; + struct trh_hdr *trh; + int riflen, num_rdsc; + + trh = (struct trh_hdr *)packet; + if (trh->daddr[0] & (uint8_t) 0x80) + return bus_mac; /* multicast */ + + if (trh->saddr[0] & TR_RII) { + riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8; + if ((ntohs(trh->rcf) >> 13) != 0) + return bus_mac; /* ARE or STE */ + } else + return trh->daddr; /* not source routed */ + + if (riflen < 6) + return trh->daddr; /* last hop, source routed */ + + /* riflen is 6 or more, packet has more than one route descriptor */ + num_rdsc = (riflen / 2) - 1; + memset(rdesc, 0, ETH_ALEN); + /* offset 4 comes from LAN destination field in LE control frames */ + if (trh->rcf & htons((uint16_t) TR_RCF_DIR_BIT)) + memcpy(&rdesc[4], &trh->rseg[num_rdsc - 2], sizeof(uint16_t)); + else { + memcpy(&rdesc[4], &trh->rseg[1], sizeof(uint16_t)); + rdesc[5] = ((ntohs(trh->rseg[0]) & 0x000f) | (rdesc[5] & 0xf0)); + } + + return NULL; } #endif /* CONFIG_TR */ @@ -205,15 +223,14 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) * there is non-reboot way to recover if something goes wrong. */ -static int -lec_open(struct net_device *dev) +static int lec_open(struct net_device *dev) { - struct lec_priv *priv = (struct lec_priv *)dev->priv; - + struct lec_priv *priv = (struct lec_priv *)dev->priv; + netif_start_queue(dev); - memset(&priv->stats,0,sizeof(struct net_device_stats)); - - return 0; + memset(&priv->stats, 0, sizeof(struct net_device_stats)); + + return 0; } static __inline__ void @@ -232,160 +249,166 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb, struct lec_priv *priv) priv->stats.tx_bytes += skb->len; } -static void -lec_tx_timeout(struct net_device *dev) +static void lec_tx_timeout(struct net_device *dev) { printk(KERN_INFO "%s: tx timeout\n", dev->name); dev->trans_start = jiffies; netif_wake_queue(dev); } -static int -lec_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct sk_buff *skb2; - struct lec_priv *priv = (struct lec_priv *)dev->priv; - struct lecdatahdr_8023 *lec_h; - struct atm_vcc *vcc; + struct sk_buff *skb2; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lecdatahdr_8023 *lec_h; + struct atm_vcc *vcc; struct lec_arp_table *entry; - unsigned char *dst; + unsigned char *dst; int min_frame_size; #ifdef CONFIG_TR - unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */ + unsigned char rdesc[ETH_ALEN]; /* Token Ring route descriptor */ #endif - int is_rdesc; + int is_rdesc; #if DUMP_PACKETS > 0 - char buf[300]; - int i=0; + char buf[300]; + int i = 0; #endif /* DUMP_PACKETS >0 */ - - DPRINTK("lec_start_xmit called\n"); - if (!priv->lecd) { - printk("%s:No lecd attached\n",dev->name); - priv->stats.tx_errors++; - netif_stop_queue(dev); - return -EUNATCH; - } - - DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", - (long)skb->head, (long)skb->data, (long)skb->tail, - (long)skb->end); + + DPRINTK("lec_start_xmit called\n"); + if (!priv->lecd) { + printk("%s:No lecd attached\n", dev->name); + priv->stats.tx_errors++; + netif_stop_queue(dev); + return -EUNATCH; + } + + DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", + (long)skb->head, (long)skb->data, (long)skb->tail, + (long)skb->end); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) - lec_handle_bridge(skb, dev); + if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) + lec_handle_bridge(skb, dev); #endif - /* Make sure we have room for lec_id */ - if (skb_headroom(skb) < 2) { + /* Make sure we have room for lec_id */ + if (skb_headroom(skb) < 2) { - DPRINTK("lec_start_xmit: reallocating skb\n"); - skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); - kfree_skb(skb); - if (skb2 == NULL) return 0; - skb = skb2; - } - skb_push(skb, 2); + DPRINTK("lec_start_xmit: reallocating skb\n"); + skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); + kfree_skb(skb); + if (skb2 == NULL) + return 0; + skb = skb2; + } + skb_push(skb, 2); - /* Put le header to place, works for TokenRing too */ - lec_h = (struct lecdatahdr_8023*)skb->data; - lec_h->le_header = htons(priv->lecid); + /* Put le header to place, works for TokenRing too */ + lec_h = (struct lecdatahdr_8023 *)skb->data; + lec_h->le_header = htons(priv->lecid); #ifdef CONFIG_TR - /* Ugly. Use this to realign Token Ring packets for - * e.g. PCA-200E driver. */ - if (priv->is_trdev) { - skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); - kfree_skb(skb); - if (skb2 == NULL) return 0; - skb = skb2; - } + /* + * Ugly. Use this to realign Token Ring packets for + * e.g. PCA-200E driver. + */ + if (priv->is_trdev) { + skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); + kfree_skb(skb); + if (skb2 == NULL) + return 0; + skb = skb2; + } #endif #if DUMP_PACKETS > 0 - printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name, - skb->len, priv->lecid); + printk("%s: send datalen:%ld lecid:%4.4x\n", dev->name, + skb->len, priv->lecid); #if DUMP_PACKETS >= 2 - for(i=0;i<skb->len && i <99;i++) { - sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 99; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #elif DUMP_PACKETS >= 1 - for(i=0;i<skb->len && i < 30;i++) { - sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 30; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #endif /* DUMP_PACKETS >= 1 */ - if (i==skb->len) - printk("%s\n",buf); - else - printk("%s...\n",buf); + if (i == skb->len) + printk("%s\n", buf); + else + printk("%s...\n", buf); #endif /* DUMP_PACKETS > 0 */ - /* Minimum ethernet-frame size */ + /* Minimum ethernet-frame size */ #ifdef CONFIG_TR - if (priv->is_trdev) - min_frame_size = LEC_MINIMUM_8025_SIZE; + if (priv->is_trdev) + min_frame_size = LEC_MINIMUM_8025_SIZE; else #endif - min_frame_size = LEC_MINIMUM_8023_SIZE; - if (skb->len < min_frame_size) { - if ((skb->len + skb_tailroom(skb)) < min_frame_size) { - skb2 = skb_copy_expand(skb, 0, - min_frame_size - skb->truesize, GFP_ATOMIC); - dev_kfree_skb(skb); - if (skb2 == NULL) { - priv->stats.tx_dropped++; - return 0; - } - skb = skb2; - } + min_frame_size = LEC_MINIMUM_8023_SIZE; + if (skb->len < min_frame_size) { + if ((skb->len + skb_tailroom(skb)) < min_frame_size) { + skb2 = skb_copy_expand(skb, 0, + min_frame_size - skb->truesize, + GFP_ATOMIC); + dev_kfree_skb(skb); + if (skb2 == NULL) { + priv->stats.tx_dropped++; + return 0; + } + skb = skb2; + } skb_put(skb, min_frame_size - skb->len); - } - - /* Send to right vcc */ - is_rdesc = 0; - dst = lec_h->h_dest; + } + + /* Send to right vcc */ + is_rdesc = 0; + dst = lec_h->h_dest; #ifdef CONFIG_TR - if (priv->is_trdev) { - dst = get_tr_dst(skb->data+2, rdesc); - if (dst == NULL) { - dst = rdesc; - is_rdesc = 1; - } - } + if (priv->is_trdev) { + dst = get_tr_dst(skb->data + 2, rdesc); + if (dst == NULL) { + dst = rdesc; + is_rdesc = 1; + } + } #endif - entry = NULL; - vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); - DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, - vcc, vcc?vcc->flags:0, entry); - if (!vcc || !test_bit(ATM_VF_READY,&vcc->flags)) { - if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { - DPRINTK("%s:lec_start_xmit: queuing packet, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); - skb_queue_tail(&entry->tx_wait, skb); - } else { - DPRINTK("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); - priv->stats.tx_dropped++; - dev_kfree_skb(skb); - } - return 0; - } - -#if DUMP_PACKETS > 0 - printk("%s:sending to vpi:%d vci:%d\n", dev->name, - vcc->vpi, vcc->vci); + entry = NULL; + vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); + DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, + vcc, vcc ? vcc->flags : 0, entry); + if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) { + if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { + DPRINTK("%s:lec_start_xmit: queuing packet, ", + dev->name); + DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); + skb_queue_tail(&entry->tx_wait, skb); + } else { + DPRINTK + ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", + dev->name); + DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + lec_h->h_dest[0], lec_h->h_dest[1], + lec_h->h_dest[2], lec_h->h_dest[3], + lec_h->h_dest[4], lec_h->h_dest[5]); + priv->stats.tx_dropped++; + dev_kfree_skb(skb); + } + goto out; + } +#if DUMP_PACKETS > 0 + printk("%s:sending to vpi:%d vci:%d\n", dev->name, vcc->vpi, vcc->vci); #endif /* DUMP_PACKETS > 0 */ - - while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { - DPRINTK("lec.c: emptying tx queue, "); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], - lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); + + while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { + DPRINTK("lec.c: emptying tx queue, "); + DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], + lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); lec_send(vcc, skb2, priv); - } + } lec_send(vcc, skb, priv); @@ -405,210 +428,219 @@ lec_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_wake_queue(dev); } +out: + if (entry) + lec_arp_put(entry); dev->trans_start = jiffies; - return 0; + return 0; } /* The inverse routine to net_open(). */ -static int -lec_close(struct net_device *dev) +static int lec_close(struct net_device *dev) { - netif_stop_queue(dev); - return 0; + netif_stop_queue(dev); + return 0; } /* * Get the current statistics. * This may be called with the card open or closed. */ -static struct net_device_stats * -lec_get_stats(struct net_device *dev) +static struct net_device_stats *lec_get_stats(struct net_device *dev) { - return &((struct lec_priv *)dev->priv)->stats; + return &((struct lec_priv *)dev->priv)->stats; } -static int -lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) +static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct net_device *dev = (struct net_device*)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv*)dev->priv; - struct atmlec_msg *mesg; - struct lec_arp_table *entry; - int i; - char *tmp; /* FIXME */ + struct net_device *dev = (struct net_device *)vcc->proto_data; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct atmlec_msg *mesg; + struct lec_arp_table *entry; + int i; + char *tmp; /* FIXME */ atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - mesg = (struct atmlec_msg *)skb->data; - tmp = skb->data; - tmp += sizeof(struct atmlec_msg); - DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); - switch(mesg->type) { - case l_set_mac_addr: - for (i=0;i<6;i++) { - dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; - } - break; - case l_del_mac_addr: - for(i=0;i<6;i++) { - dev->dev_addr[i] = 0; - } - break; - case l_addr_delete: - lec_addr_delete(priv, mesg->content.normal.atm_addr, - mesg->content.normal.flag); - break; - case l_topology_change: - priv->topology_change = mesg->content.normal.flag; - break; - case l_flush_complete: - lec_flush_complete(priv, mesg->content.normal.flag); - break; - case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ + mesg = (struct atmlec_msg *)skb->data; + tmp = skb->data; + tmp += sizeof(struct atmlec_msg); + DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); + switch (mesg->type) { + case l_set_mac_addr: + for (i = 0; i < 6; i++) { + dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; + } + break; + case l_del_mac_addr: + for (i = 0; i < 6; i++) { + dev->dev_addr[i] = 0; + } + break; + case l_addr_delete: + lec_addr_delete(priv, mesg->content.normal.atm_addr, + mesg->content.normal.flag); + break; + case l_topology_change: + priv->topology_change = mesg->content.normal.flag; + break; + case l_flush_complete: + lec_flush_complete(priv, mesg->content.normal.flag); + break; + case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mesg->content.normal.mac_addr); - lec_arp_remove(priv, entry); + entry = lec_arp_find(priv, mesg->content.normal.mac_addr); + lec_arp_remove(priv, entry); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if (mesg->content.normal.no_source_le_narp) - break; - /* FALL THROUGH */ - case l_arp_update: - lec_arp_update(priv, mesg->content.normal.mac_addr, - mesg->content.normal.atm_addr, - mesg->content.normal.flag, - mesg->content.normal.targetless_le_arp); - DPRINTK("lec: in l_arp_update\n"); - if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ - DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs); - lane2_associate_ind(dev, - mesg->content.normal.mac_addr, - tmp, mesg->sizeoftlvs); - } - break; - case l_config: - priv->maximum_unknown_frame_count = - mesg->content.config.maximum_unknown_frame_count; - priv->max_unknown_frame_time = - (mesg->content.config.max_unknown_frame_time*HZ); - priv->max_retry_count = - mesg->content.config.max_retry_count; - priv->aging_time = (mesg->content.config.aging_time*HZ); - priv->forward_delay_time = - (mesg->content.config.forward_delay_time*HZ); - priv->arp_response_time = - (mesg->content.config.arp_response_time*HZ); - priv->flush_timeout = (mesg->content.config.flush_timeout*HZ); - priv->path_switching_delay = - (mesg->content.config.path_switching_delay*HZ); - priv->lane_version = mesg->content.config.lane_version; /* LANE2 */ + if (mesg->content.normal.no_source_le_narp) + break; + /* FALL THROUGH */ + case l_arp_update: + lec_arp_update(priv, mesg->content.normal.mac_addr, + mesg->content.normal.atm_addr, + mesg->content.normal.flag, + mesg->content.normal.targetless_le_arp); + DPRINTK("lec: in l_arp_update\n"); + if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ + DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", + mesg->sizeoftlvs); + lane2_associate_ind(dev, mesg->content.normal.mac_addr, + tmp, mesg->sizeoftlvs); + } + break; + case l_config: + priv->maximum_unknown_frame_count = + mesg->content.config.maximum_unknown_frame_count; + priv->max_unknown_frame_time = + (mesg->content.config.max_unknown_frame_time * HZ); + priv->max_retry_count = mesg->content.config.max_retry_count; + priv->aging_time = (mesg->content.config.aging_time * HZ); + priv->forward_delay_time = + (mesg->content.config.forward_delay_time * HZ); + priv->arp_response_time = + (mesg->content.config.arp_response_time * HZ); + priv->flush_timeout = (mesg->content.config.flush_timeout * HZ); + priv->path_switching_delay = + (mesg->content.config.path_switching_delay * HZ); + priv->lane_version = mesg->content.config.lane_version; /* LANE2 */ priv->lane2_ops = NULL; if (priv->lane_version > 1) priv->lane2_ops = &lane2_ops; if (dev->change_mtu(dev, mesg->content.config.mtu)) printk("%s: change_mtu to %d failed\n", dev->name, - mesg->content.config.mtu); + mesg->content.config.mtu); priv->is_proxy = mesg->content.config.is_proxy; - break; - case l_flush_tran_id: - lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr, - mesg->content.normal.flag); - break; - case l_set_lecid: - priv->lecid=(unsigned short)(0xffff&mesg->content.normal.flag); - break; - case l_should_bridge: { + break; + case l_flush_tran_id: + lec_set_flush_tran_id(priv, mesg->content.normal.atm_addr, + mesg->content.normal.flag); + break; + case l_set_lecid: + priv->lecid = + (unsigned short)(0xffff & mesg->content.normal.flag); + break; + case l_should_bridge: #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - struct net_bridge_fdb_entry *f; - - DPRINTK("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", - dev->name, - mesg->content.proxy.mac_addr[0], mesg->content.proxy.mac_addr[1], - mesg->content.proxy.mac_addr[2], mesg->content.proxy.mac_addr[3], - mesg->content.proxy.mac_addr[4], mesg->content.proxy.mac_addr[5]); - - if (br_fdb_get_hook == NULL || dev->br_port == NULL) - break; - - f = br_fdb_get_hook(dev->br_port->br, mesg->content.proxy.mac_addr); - if (f != NULL && - f->dst->dev != dev && - f->dst->state == BR_STATE_FORWARDING) { - /* hit from bridge table, send LE_ARP_RESPONSE */ - struct sk_buff *skb2; - struct sock *sk; - - DPRINTK("%s: entry found, responding to zeppelin\n", dev->name); - skb2 = alloc_skb(sizeof(struct atmlec_msg), GFP_ATOMIC); - if (skb2 == NULL) { - br_fdb_put_hook(f); - break; - } - skb2->len = sizeof(struct atmlec_msg); - memcpy(skb2->data, mesg, sizeof(struct atmlec_msg)); - atm_force_charge(priv->lecd, skb2->truesize); - sk = sk_atm(priv->lecd); - skb_queue_tail(&sk->sk_receive_queue, skb2); - sk->sk_data_ready(sk, skb2->len); - } - if (f != NULL) br_fdb_put_hook(f); + { + struct net_bridge_fdb_entry *f; + + DPRINTK + ("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + dev->name, mesg->content.proxy.mac_addr[0], + mesg->content.proxy.mac_addr[1], + mesg->content.proxy.mac_addr[2], + mesg->content.proxy.mac_addr[3], + mesg->content.proxy.mac_addr[4], + mesg->content.proxy.mac_addr[5]); + + if (br_fdb_get_hook == NULL || dev->br_port == NULL) + break; + + f = br_fdb_get_hook(dev->br_port->br, + mesg->content.proxy.mac_addr); + if (f != NULL && f->dst->dev != dev + && f->dst->state == BR_STATE_FORWARDING) { + /* hit from bridge table, send LE_ARP_RESPONSE */ + struct sk_buff *skb2; + struct sock *sk; + + DPRINTK + ("%s: entry found, responding to zeppelin\n", + dev->name); + skb2 = + alloc_skb(sizeof(struct atmlec_msg), + GFP_ATOMIC); + if (skb2 == NULL) { + br_fdb_put_hook(f); + break; + } + skb2->len = sizeof(struct atmlec_msg); + memcpy(skb2->data, mesg, + sizeof(struct atmlec_msg)); + atm_force_charge(priv->lecd, skb2->truesize); + sk = sk_atm(priv->lecd); + skb_queue_tail(&sk->sk_receive_queue, skb2); + sk->sk_data_ready(sk, skb2->len); + } + if (f != NULL) + br_fdb_put_hook(f); + } #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ - } - break; - default: - printk("%s: Unknown message type %d\n", dev->name, mesg->type); - dev_kfree_skb(skb); - return -EINVAL; - } - dev_kfree_skb(skb); - return 0; + break; + default: + printk("%s: Unknown message type %d\n", dev->name, mesg->type); + dev_kfree_skb(skb); + return -EINVAL; + } + dev_kfree_skb(skb); + return 0; } -static void -lec_atm_close(struct atm_vcc *vcc) +static void lec_atm_close(struct atm_vcc *vcc) { - struct sk_buff *skb; - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct sk_buff *skb; + struct net_device *dev = (struct net_device *)vcc->proto_data; + struct lec_priv *priv = (struct lec_priv *)dev->priv; - priv->lecd = NULL; - /* Do something needful? */ + priv->lecd = NULL; + /* Do something needful? */ - netif_stop_queue(dev); - lec_arp_destroy(priv); + netif_stop_queue(dev); + lec_arp_destroy(priv); - if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) + if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) printk("%s lec_atm_close: closing with messages pending\n", - dev->name); - while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) { - atm_return(vcc, skb->truesize); + dev->name); + while ((skb = skb_dequeue(&sk_atm(vcc)->sk_receive_queue)) != NULL) { + atm_return(vcc, skb->truesize); dev_kfree_skb(skb); - } - + } + printk("%s: Shut down!\n", dev->name); - module_put(THIS_MODULE); + module_put(THIS_MODULE); } static struct atmdev_ops lecdev_ops = { - .close = lec_atm_close, - .send = lec_atm_send + .close = lec_atm_close, + .send = lec_atm_send }; static struct atm_dev lecatm_dev = { - .ops = &lecdev_ops, - .type = "lec", - .number = 999, /* dummy device number */ - .lock = SPIN_LOCK_UNLOCKED + .ops = &lecdev_ops, + .type = "lec", + .number = 999, /* dummy device number */ + .lock = SPIN_LOCK_UNLOCKED }; /* * LANE2: new argument struct sk_buff *data contains * the LE_ARP based TLVs introduced in the LANE2 spec */ -static int -send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, - unsigned char *mac_addr, unsigned char *atm_addr, - struct sk_buff *data) +static int +send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, + unsigned char *mac_addr, unsigned char *atm_addr, + struct sk_buff *data) { struct sock *sk; struct sk_buff *skb; @@ -622,187 +654,193 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, return -1; skb->len = sizeof(struct atmlec_msg); mesg = (struct atmlec_msg *)skb->data; - memset(mesg, 0, sizeof(struct atmlec_msg)); + memset(mesg, 0, sizeof(struct atmlec_msg)); mesg->type = type; - if (data != NULL) - mesg->sizeoftlvs = data->len; + if (data != NULL) + mesg->sizeoftlvs = data->len; if (mac_addr) memcpy(&mesg->content.normal.mac_addr, mac_addr, ETH_ALEN); - else - mesg->content.normal.targetless_le_arp = 1; + else + mesg->content.normal.targetless_le_arp = 1; if (atm_addr) memcpy(&mesg->content.normal.atm_addr, atm_addr, ATM_ESA_LEN); - atm_force_charge(priv->lecd, skb->truesize); + atm_force_charge(priv->lecd, skb->truesize); sk = sk_atm(priv->lecd); skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk, skb->len); - if (data != NULL) { - DPRINTK("lec: about to send %d bytes of data\n", data->len); - atm_force_charge(priv->lecd, data->truesize); - skb_queue_tail(&sk->sk_receive_queue, data); - sk->sk_data_ready(sk, skb->len); - } + if (data != NULL) { + DPRINTK("lec: about to send %d bytes of data\n", data->len); + atm_force_charge(priv->lecd, data->truesize); + skb_queue_tail(&sk->sk_receive_queue, data); + sk->sk_data_ready(sk, skb->len); + } - return 0; + return 0; } /* shamelessly stolen from drivers/net/net_init.c */ static int lec_change_mtu(struct net_device *dev, int new_mtu) { - if ((new_mtu < 68) || (new_mtu > 18190)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; + if ((new_mtu < 68) || (new_mtu > 18190)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; } static void lec_set_multicast_list(struct net_device *dev) { - /* by default, all multicast frames arrive over the bus. - * eventually support selective multicast service - */ - return; + /* + * by default, all multicast frames arrive over the bus. + * eventually support selective multicast service + */ + return; } -static void -lec_init(struct net_device *dev) +static void lec_init(struct net_device *dev) { - dev->change_mtu = lec_change_mtu; - dev->open = lec_open; - dev->stop = lec_close; - dev->hard_start_xmit = lec_start_xmit; + dev->change_mtu = lec_change_mtu; + dev->open = lec_open; + dev->stop = lec_close; + dev->hard_start_xmit = lec_start_xmit; dev->tx_timeout = lec_tx_timeout; - dev->get_stats = lec_get_stats; - dev->set_multicast_list = lec_set_multicast_list; - dev->do_ioctl = NULL; - printk("%s: Initialized!\n",dev->name); - return; + dev->get_stats = lec_get_stats; + dev->set_multicast_list = lec_set_multicast_list; + dev->do_ioctl = NULL; + printk("%s: Initialized!\n", dev->name); + return; } static unsigned char lec_ctrl_magic[] = { - 0xff, - 0x00, - 0x01, - 0x01 }; + 0xff, + 0x00, + 0x01, + 0x01 +}; #define LEC_DATA_DIRECT_8023 2 #define LEC_DATA_DIRECT_8025 3 static int lec_is_data_direct(struct atm_vcc *vcc) -{ +{ return ((vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8023) || (vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8025)); -} +} -static void -lec_push(struct atm_vcc *vcc, struct sk_buff *skb) +static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct net_device *dev = (struct net_device *)vcc->proto_data; - struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct net_device *dev = (struct net_device *)vcc->proto_data; + struct lec_priv *priv = (struct lec_priv *)dev->priv; #if DUMP_PACKETS >0 - int i=0; - char buf[300]; + int i = 0; + char buf[300]; - printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name, - vcc->vpi, vcc->vci); + printk("%s: lec_push vcc vpi:%d vci:%d\n", dev->name, + vcc->vpi, vcc->vci); #endif - if (!skb) { - DPRINTK("%s: null skb\n",dev->name); - lec_vcc_close(priv, vcc); - return; - } + if (!skb) { + DPRINTK("%s: null skb\n", dev->name); + lec_vcc_close(priv, vcc); + return; + } #if DUMP_PACKETS > 0 - printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name, - skb->len, priv->lecid); + printk("%s: rcv datalen:%ld lecid:%4.4x\n", dev->name, + skb->len, priv->lecid); #if DUMP_PACKETS >= 2 - for(i=0;i<skb->len && i <99;i++) { - sprintf(buf+i*3,"%2.2x ",0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 99; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #elif DUMP_PACKETS >= 1 - for(i=0;i<skb->len && i < 30;i++) { - sprintf(buf+i*3,"%2.2x ", 0xff&skb->data[i]); - } + for (i = 0; i < skb->len && i < 30; i++) { + sprintf(buf + i * 3, "%2.2x ", 0xff & skb->data[i]); + } #endif /* DUMP_PACKETS >= 1 */ - if (i==skb->len) - printk("%s\n",buf); - else - printk("%s...\n",buf); + if (i == skb->len) + printk("%s\n", buf); + else + printk("%s...\n", buf); #endif /* DUMP_PACKETS > 0 */ - if (memcmp(skb->data, lec_ctrl_magic, 4) ==0) { /* Control frame, to daemon*/ + if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { /* Control frame, to daemon */ struct sock *sk = sk_atm(vcc); - DPRINTK("%s: To daemon\n",dev->name); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } else { /* Data frame, queue to protocol handlers */ + DPRINTK("%s: To daemon\n", dev->name); + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); + } else { /* Data frame, queue to protocol handlers */ struct lec_arp_table *entry; - unsigned char *src, *dst; - - atm_return(vcc,skb->truesize); - if (*(uint16_t *)skb->data == htons(priv->lecid) || - !priv->lecd || - !(dev->flags & IFF_UP)) { - /* Probably looping back, or if lecd is missing, - lecd has gone down */ - DPRINTK("Ignoring frame...\n"); - dev_kfree_skb(skb); - return; - } + unsigned char *src, *dst; + + atm_return(vcc, skb->truesize); + if (*(uint16_t *) skb->data == htons(priv->lecid) || + !priv->lecd || !(dev->flags & IFF_UP)) { + /* + * Probably looping back, or if lecd is missing, + * lecd has gone down + */ + DPRINTK("Ignoring frame...\n"); + dev_kfree_skb(skb); + return; + } #ifdef CONFIG_TR - if (priv->is_trdev) - dst = ((struct lecdatahdr_8025 *) skb->data)->h_dest; - else + if (priv->is_trdev) + dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest; + else #endif - dst = ((struct lecdatahdr_8023 *) skb->data)->h_dest; + dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest; - /* If this is a Data Direct VCC, and the VCC does not match + /* + * If this is a Data Direct VCC, and the VCC does not match * the LE_ARP cache entry, delete the LE_ARP cache entry. */ spin_lock_irqsave(&priv->lec_arp_lock, flags); if (lec_is_data_direct(vcc)) { #ifdef CONFIG_TR if (priv->is_trdev) - src = ((struct lecdatahdr_8025 *) skb->data)->h_source; + src = + ((struct lecdatahdr_8025 *)skb->data)-> + h_source; else #endif - src = ((struct lecdatahdr_8023 *) skb->data)->h_source; + src = + ((struct lecdatahdr_8023 *)skb->data)-> + h_source; entry = lec_arp_find(priv, src); if (entry && entry->vcc != vcc) { lec_arp_remove(priv, entry); - kfree(entry); + lec_arp_put(entry); } } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if (!(dst[0]&0x01) && /* Never filter Multi/Broadcast */ - !priv->is_proxy && /* Proxy wants all the packets */ + if (!(dst[0] & 0x01) && /* Never filter Multi/Broadcast */ + !priv->is_proxy && /* Proxy wants all the packets */ memcmp(dst, dev->dev_addr, dev->addr_len)) { - dev_kfree_skb(skb); - return; - } - if (priv->lec_arp_empty_ones) { - lec_arp_check_empties(priv, vcc, skb); - } - skb->dev = dev; - skb_pull(skb, 2); /* skip lec_id */ + dev_kfree_skb(skb); + return; + } + if (!hlist_empty(&priv->lec_arp_empty_ones)) { + lec_arp_check_empties(priv, vcc, skb); + } + skb->dev = dev; + skb_pull(skb, 2); /* skip lec_id */ #ifdef CONFIG_TR - if (priv->is_trdev) skb->protocol = tr_type_trans(skb, dev); - else + if (priv->is_trdev) + skb->protocol = tr_type_trans(skb, dev); + else #endif - skb->protocol = eth_type_trans(skb, dev); - priv->stats.rx_packets++; - priv->stats.rx_bytes += skb->len; - memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); - netif_rx(skb); - } + skb->protocol = eth_type_trans(skb, dev); + priv->stats.rx_packets++; + priv->stats.rx_bytes += skb->len; + memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); + netif_rx(skb); + } } -static void -lec_pop(struct atm_vcc *vcc, struct sk_buff *skb) +static void lec_pop(struct atm_vcc *vcc, struct sk_buff *skb) { struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); struct net_device *dev = skb->dev; @@ -821,123 +859,121 @@ lec_pop(struct atm_vcc *vcc, struct sk_buff *skb) } } -static int -lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) +static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) { struct lec_vcc_priv *vpriv; - int bytes_left; - struct atmlec_ioc ioc_data; - - /* Lecd must be up in this case */ - bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); - if (bytes_left != 0) { - printk("lec: lec_vcc_attach, copy from user failed for %d bytes\n", - bytes_left); - } - if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF || - !dev_lec[ioc_data.dev_num]) - return -EINVAL; + int bytes_left; + struct atmlec_ioc ioc_data; + + /* Lecd must be up in this case */ + bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); + if (bytes_left != 0) { + printk + ("lec: lec_vcc_attach, copy from user failed for %d bytes\n", + bytes_left); + } + if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF || + !dev_lec[ioc_data.dev_num]) + return -EINVAL; if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL))) return -ENOMEM; vpriv->xoff = 0; vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; vcc->pop = lec_pop; - lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, - &ioc_data, vcc, vcc->push); - vcc->proto_data = dev_lec[ioc_data.dev_num]; - vcc->push = lec_push; - return 0; + lec_vcc_added(dev_lec[ioc_data.dev_num]->priv, + &ioc_data, vcc, vcc->push); + vcc->proto_data = dev_lec[ioc_data.dev_num]; + vcc->push = lec_push; + return 0; } -static int -lec_mcast_attach(struct atm_vcc *vcc, int arg) +static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg <0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) - return -EINVAL; - vcc->proto_data = dev_lec[arg]; - return (lec_mcast_make((struct lec_priv*)dev_lec[arg]->priv, vcc)); + if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + return -EINVAL; + vcc->proto_data = dev_lec[arg]; + return (lec_mcast_make((struct lec_priv *)dev_lec[arg]->priv, vcc)); } /* Initialize device. */ -static int -lecd_attach(struct atm_vcc *vcc, int arg) -{ - int i; - struct lec_priv *priv; - - if (arg<0) - i = 0; - else - i = arg; +static int lecd_attach(struct atm_vcc *vcc, int arg) +{ + int i; + struct lec_priv *priv; + + if (arg < 0) + i = 0; + else + i = arg; #ifdef CONFIG_TR - if (arg >= MAX_LEC_ITF) - return -EINVAL; -#else /* Reserve the top NUM_TR_DEVS for TR */ - if (arg >= (MAX_LEC_ITF-NUM_TR_DEVS)) - return -EINVAL; + if (arg >= MAX_LEC_ITF) + return -EINVAL; +#else /* Reserve the top NUM_TR_DEVS for TR */ + if (arg >= (MAX_LEC_ITF - NUM_TR_DEVS)) + return -EINVAL; #endif - if (!dev_lec[i]) { - int is_trdev, size; + if (!dev_lec[i]) { + int is_trdev, size; - is_trdev = 0; - if (i >= (MAX_LEC_ITF - NUM_TR_DEVS)) - is_trdev = 1; + is_trdev = 0; + if (i >= (MAX_LEC_ITF - NUM_TR_DEVS)) + is_trdev = 1; - size = sizeof(struct lec_priv); + size = sizeof(struct lec_priv); #ifdef CONFIG_TR - if (is_trdev) - dev_lec[i] = alloc_trdev(size); - else + if (is_trdev) + dev_lec[i] = alloc_trdev(size); + else #endif - dev_lec[i] = alloc_etherdev(size); - if (!dev_lec[i]) - return -ENOMEM; - snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); - if (register_netdev(dev_lec[i])) { - free_netdev(dev_lec[i]); - return -EINVAL; - } - - priv = dev_lec[i]->priv; - priv->is_trdev = is_trdev; - lec_init(dev_lec[i]); - } else { - priv = dev_lec[i]->priv; - if (priv->lecd) - return -EADDRINUSE; - } - lec_arp_init(priv); - priv->itfnum = i; /* LANE2 addition */ - priv->lecd = vcc; - vcc->dev = &lecatm_dev; - vcc_insert_socket(sk_atm(vcc)); - - vcc->proto_data = dev_lec[i]; - set_bit(ATM_VF_META,&vcc->flags); - set_bit(ATM_VF_READY,&vcc->flags); - - /* Set default values to these variables */ - priv->maximum_unknown_frame_count = 1; - priv->max_unknown_frame_time = (1*HZ); - priv->vcc_timeout_period = (1200*HZ); - priv->max_retry_count = 1; - priv->aging_time = (300*HZ); - priv->forward_delay_time = (15*HZ); - priv->topology_change = 0; - priv->arp_response_time = (1*HZ); - priv->flush_timeout = (4*HZ); - priv->path_switching_delay = (6*HZ); - - if (dev_lec[i]->flags & IFF_UP) { - netif_start_queue(dev_lec[i]); - } - __module_get(THIS_MODULE); - return i; + dev_lec[i] = alloc_etherdev(size); + if (!dev_lec[i]) + return -ENOMEM; + snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); + if (register_netdev(dev_lec[i])) { + free_netdev(dev_lec[i]); + return -EINVAL; + } + + priv = dev_lec[i]->priv; + priv->is_trdev = is_trdev; + lec_init(dev_lec[i]); + } else { + priv = dev_lec[i]->priv; + if (priv->lecd) + return -EADDRINUSE; + } + lec_arp_init(priv); + priv->itfnum = i; /* LANE2 addition */ + priv->lecd = vcc; + vcc->dev = &lecatm_dev; + vcc_insert_socket(sk_atm(vcc)); + + vcc->proto_data = dev_lec[i]; + set_bit(ATM_VF_META, &vcc->flags); + set_bit(ATM_VF_READY, &vcc->flags); + + /* Set default values to these variables */ + priv->maximum_unknown_frame_count = 1; + priv->max_unknown_frame_time = (1 * HZ); + priv->vcc_timeout_period = (1200 * HZ); + priv->max_retry_count = 1; + priv->aging_time = (300 * HZ); + priv->forward_delay_time = (15 * HZ); + priv->topology_change = 0; + priv->arp_response_time = (1 * HZ); + priv->flush_timeout = (4 * HZ); + priv->path_switching_delay = (6 * HZ); + + if (dev_lec[i]->flags & IFF_UP) { + netif_start_queue(dev_lec[i]); + } + __module_get(THIS_MODULE); + return i; } #ifdef CONFIG_PROC_FS -static char* lec_arp_get_status_string(unsigned char status) +static char *lec_arp_get_status_string(unsigned char status) { static char *lec_arp_status_string[] = { "ESI_UNKNOWN ", @@ -967,52 +1003,54 @@ static void lec_info(struct seq_file *seq, struct lec_arp_table *entry) if (entry->vcc) seq_printf(seq, "%3d %3d ", entry->vcc->vpi, entry->vcc->vci); else - seq_printf(seq, " "); + seq_printf(seq, " "); if (entry->recv_vcc) { seq_printf(seq, " %3d %3d", entry->recv_vcc->vpi, entry->recv_vcc->vci); - } - seq_putc(seq, '\n'); + } + seq_putc(seq, '\n'); } - struct lec_state { unsigned long flags; struct lec_priv *locked; - struct lec_arp_table *entry; + struct hlist_node *node; struct net_device *dev; int itf; int arp_table; int misc_table; }; -static void *lec_tbl_walk(struct lec_state *state, struct lec_arp_table *tbl, +static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl, loff_t *l) { - struct lec_arp_table *e = state->entry; + struct hlist_node *e = state->node; + struct lec_arp_table *tmp; if (!e) - e = tbl; + e = tbl->first; if (e == (void *)1) { - e = tbl; + e = tbl->first; --*l; } - for (; e; e = e->next) { + + hlist_for_each_entry_from(tmp, e, next) { if (--*l < 0) break; } - state->entry = e; + state->node = e; + return (*l < 0) ? state : NULL; } static void *lec_arp_walk(struct lec_state *state, loff_t *l, - struct lec_priv *priv) + struct lec_priv *priv) { void *v = NULL; int p; for (p = state->arp_table; p < LEC_ARP_TABLE_SIZE; p++) { - v = lec_tbl_walk(state, priv->lec_arp_tables[p], l); + v = lec_tbl_walk(state, &priv->lec_arp_tables[p], l); if (v) break; } @@ -1023,10 +1061,10 @@ static void *lec_arp_walk(struct lec_state *state, loff_t *l, static void *lec_misc_walk(struct lec_state *state, loff_t *l, struct lec_priv *priv) { - struct lec_arp_table *lec_misc_tables[] = { - priv->lec_arp_empty_ones, - priv->lec_no_forward, - priv->mcast_fwds + struct hlist_head *lec_misc_tables[] = { + &priv->lec_arp_empty_ones, + &priv->lec_no_forward, + &priv->mcast_fwds }; void *v = NULL; int q; @@ -1047,8 +1085,7 @@ static void *lec_priv_walk(struct lec_state *state, loff_t *l, state->locked = priv; spin_lock_irqsave(&priv->lec_arp_lock, state->flags); } - if (!lec_arp_walk(state, l, priv) && - !lec_misc_walk(state, l, priv)) { + if (!lec_arp_walk(state, l, priv) && !lec_misc_walk(state, l, priv)) { spin_unlock_irqrestore(&priv->lec_arp_lock, state->flags); state->locked = NULL; /* Partial state reset for the next time we get called */ @@ -1082,7 +1119,7 @@ static void *lec_get_idx(struct lec_state *state, loff_t l) if (v) break; } - return v; + return v; } static void *lec_seq_start(struct seq_file *seq, loff_t *pos) @@ -1094,9 +1131,9 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos) state->locked = NULL; state->arp_table = 0; state->misc_table = 0; - state->entry = (void *)1; + state->node = (void *)1; - return *pos ? lec_get_idx(state, *pos) : (void*)1; + return *pos ? lec_get_idx(state, *pos) : (void *)1; } static void lec_seq_stop(struct seq_file *seq, void *v) @@ -1121,27 +1158,28 @@ static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int lec_seq_show(struct seq_file *seq, void *v) { - static char lec_banner[] = "Itf MAC ATM destination" - " Status Flags " - "VPI/VCI Recv VPI/VCI\n"; + static char lec_banner[] = "Itf MAC ATM destination" + " Status Flags " + "VPI/VCI Recv VPI/VCI\n"; if (v == (void *)1) seq_puts(seq, lec_banner); else { struct lec_state *state = seq->private; - struct net_device *dev = state->dev; + struct net_device *dev = state->dev; + struct lec_arp_table *entry = hlist_entry(state->node, struct lec_arp_table, next); seq_printf(seq, "%s ", dev->name); - lec_info(seq, state->entry); + lec_info(seq, entry); } return 0; } static struct seq_operations lec_seq_ops = { - .start = lec_seq_start, - .next = lec_seq_next, - .stop = lec_seq_stop, - .show = lec_seq_show, + .start = lec_seq_start, + .next = lec_seq_next, + .stop = lec_seq_stop, + .show = lec_seq_show, }; static int lec_seq_open(struct inode *inode, struct file *file) @@ -1175,11 +1213,11 @@ static int lec_seq_release(struct inode *inode, struct file *file) } static struct file_operations lec_seq_fops = { - .owner = THIS_MODULE, - .open = lec_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = lec_seq_release, + .owner = THIS_MODULE, + .open = lec_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = lec_seq_release, }; #endif @@ -1187,38 +1225,38 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct atm_vcc *vcc = ATM_SD(sock); int err = 0; - + switch (cmd) { - case ATMLEC_CTRL: - case ATMLEC_MCAST: - case ATMLEC_DATA: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - default: - return -ENOIOCTLCMD; + case ATMLEC_CTRL: + case ATMLEC_MCAST: + case ATMLEC_DATA: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + return -ENOIOCTLCMD; } switch (cmd) { - case ATMLEC_CTRL: - err = lecd_attach(vcc, (int) arg); - if (err >= 0) - sock->state = SS_CONNECTED; - break; - case ATMLEC_MCAST: - err = lec_mcast_attach(vcc, (int) arg); - break; - case ATMLEC_DATA: - err = lec_vcc_attach(vcc, (void __user *) arg); - break; + case ATMLEC_CTRL: + err = lecd_attach(vcc, (int)arg); + if (err >= 0) + sock->state = SS_CONNECTED; + break; + case ATMLEC_MCAST: + err = lec_mcast_attach(vcc, (int)arg); + break; + case ATMLEC_DATA: + err = lec_vcc_attach(vcc, (void __user *)arg); + break; } return err; } static struct atm_ioctl lane_ioctl_ops = { - .owner = THIS_MODULE, - .ioctl = lane_ioctl, + .owner = THIS_MODULE, + .ioctl = lane_ioctl, }; static int __init lane_module_init(void) @@ -1232,29 +1270,29 @@ static int __init lane_module_init(void) #endif register_atm_ioctl(&lane_ioctl_ops); - printk("lec.c: " __DATE__ " " __TIME__ " initialized\n"); - return 0; + printk("lec.c: " __DATE__ " " __TIME__ " initialized\n"); + return 0; } static void __exit lane_module_cleanup(void) { - int i; - struct lec_priv *priv; + int i; + struct lec_priv *priv; remove_proc_entry("lec", atm_proc_root); deregister_atm_ioctl(&lane_ioctl_ops); - for (i = 0; i < MAX_LEC_ITF; i++) { - if (dev_lec[i] != NULL) { - priv = (struct lec_priv *)dev_lec[i]->priv; + for (i = 0; i < MAX_LEC_ITF; i++) { + if (dev_lec[i] != NULL) { + priv = (struct lec_priv *)dev_lec[i]->priv; unregister_netdev(dev_lec[i]); - free_netdev(dev_lec[i]); - dev_lec[i] = NULL; - } - } + free_netdev(dev_lec[i]); + dev_lec[i] = NULL; + } + } - return; + return; } module_init(lane_module_init); @@ -1268,34 +1306,34 @@ module_exit(lane_module_cleanup); * If dst_mac == NULL, targetless LE_ARP will be sent */ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs) + u8 **tlvs, u32 *sizeoftlvs) { unsigned long flags; - struct lec_priv *priv = (struct lec_priv *)dev->priv; - struct lec_arp_table *table; - struct sk_buff *skb; - int retval; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + struct lec_arp_table *table; + struct sk_buff *skb; + int retval; - if (force == 0) { + if (force == 0) { spin_lock_irqsave(&priv->lec_arp_lock, flags); - table = lec_arp_find(priv, dst_mac); + table = lec_arp_find(priv, dst_mac); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - if(table == NULL) - return -1; - - *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC); - if (*tlvs == NULL) - return -1; - - memcpy(*tlvs, table->tlvs, table->sizeoftlvs); - *sizeoftlvs = table->sizeoftlvs; - - return 0; - } + if (table == NULL) + return -1; + + *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC); + if (*tlvs == NULL) + return -1; + + memcpy(*tlvs, table->tlvs, table->sizeoftlvs); + *sizeoftlvs = table->sizeoftlvs; + + return 0; + } if (sizeoftlvs == NULL) retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, NULL); - + else { skb = alloc_skb(*sizeoftlvs, GFP_ATOMIC); if (skb == NULL) @@ -1304,9 +1342,8 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, memcpy(skb->data, *tlvs, *sizeoftlvs); retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb); } - return retval; -} - + return retval; +} /* * LANE2: 3.1.4, LE_ASSOCIATE.request @@ -1315,80 +1352,85 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, * Returns 1 for success, 0 for failure (out of memory) * */ -static int lane2_associate_req (struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs) +static int lane2_associate_req(struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs) { - int retval; - struct sk_buff *skb; - struct lec_priv *priv = (struct lec_priv*)dev->priv; - - if (compare_ether_addr(lan_dst, dev->dev_addr)) - return (0); /* not our mac address */ - - kfree(priv->tlvs); /* NULL if there was no previous association */ - - priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); - if (priv->tlvs == NULL) - return (0); - priv->sizeoftlvs = sizeoftlvs; - memcpy(priv->tlvs, tlvs, sizeoftlvs); - - skb = alloc_skb(sizeoftlvs, GFP_ATOMIC); - if (skb == NULL) - return 0; - skb->len = sizeoftlvs; - memcpy(skb->data, tlvs, sizeoftlvs); - retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb); - if (retval != 0) - printk("lec.c: lane2_associate_req() failed\n"); - /* If the previous association has changed we must - * somehow notify other LANE entities about the change - */ - return (1); + int retval; + struct sk_buff *skb; + struct lec_priv *priv = (struct lec_priv *)dev->priv; + + if (compare_ether_addr(lan_dst, dev->dev_addr)) + return (0); /* not our mac address */ + + kfree(priv->tlvs); /* NULL if there was no previous association */ + + priv->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); + if (priv->tlvs == NULL) + return (0); + priv->sizeoftlvs = sizeoftlvs; + memcpy(priv->tlvs, tlvs, sizeoftlvs); + + skb = alloc_skb(sizeoftlvs, GFP_ATOMIC); + if (skb == NULL) + return 0; + skb->len = sizeoftlvs; + memcpy(skb->data, tlvs, sizeoftlvs); + retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb); + if (retval != 0) + printk("lec.c: lane2_associate_req() failed\n"); + /* + * If the previous association has changed we must + * somehow notify other LANE entities about the change + */ + return (1); } /* * LANE2: 3.1.5, LE_ASSOCIATE.indication * */ -static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs) +static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr, + u8 *tlvs, u32 sizeoftlvs) { #if 0 - int i = 0; + int i = 0; #endif struct lec_priv *priv = (struct lec_priv *)dev->priv; -#if 0 /* Why have the TLVs in LE_ARP entries since we do not use them? When you - uncomment this code, make sure the TLVs get freed when entry is killed */ - struct lec_arp_table *entry = lec_arp_find(priv, mac_addr); +#if 0 /* + * Why have the TLVs in LE_ARP entries + * since we do not use them? When you + * uncomment this code, make sure the + * TLVs get freed when entry is killed + */ + struct lec_arp_table *entry = lec_arp_find(priv, mac_addr); - if (entry == NULL) - return; /* should not happen */ + if (entry == NULL) + return; /* should not happen */ - kfree(entry->tlvs); + kfree(entry->tlvs); - entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); - if (entry->tlvs == NULL) - return; + entry->tlvs = kmalloc(sizeoftlvs, GFP_KERNEL); + if (entry->tlvs == NULL) + return; - entry->sizeoftlvs = sizeoftlvs; - memcpy(entry->tlvs, tlvs, sizeoftlvs); + entry->sizeoftlvs = sizeoftlvs; + memcpy(entry->tlvs, tlvs, sizeoftlvs); #endif #if 0 - printk("lec.c: lane2_associate_ind()\n"); - printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs); - while (i < sizeoftlvs) - printk("%02x ", tlvs[i++]); - - printk("\n"); + printk("lec.c: lane2_associate_ind()\n"); + printk("dump of tlvs, sizeoftlvs=%d\n", sizeoftlvs); + while (i < sizeoftlvs) + printk("%02x ", tlvs[i++]); + + printk("\n"); #endif - /* tell MPOA about the TLVs we saw */ - if (priv->lane2_ops && priv->lane2_ops->associate_indicator) { - priv->lane2_ops->associate_indicator(dev, mac_addr, - tlvs, sizeoftlvs); - } - return; + /* tell MPOA about the TLVs we saw */ + if (priv->lane2_ops && priv->lane2_ops->associate_indicator) { + priv->lane2_ops->associate_indicator(dev, mac_addr, + tlvs, sizeoftlvs); + } + return; } /* @@ -1396,7 +1438,6 @@ static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr, * * lec_arpc.c was added here when making * lane client modular. October 1997 - * */ #include <linux/types.h> @@ -1407,7 +1448,6 @@ static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr, #include <linux/inetdevice.h> #include <net/route.h> - #if 0 #define DPRINTK(format,args...) /* @@ -1418,7 +1458,7 @@ static void lane2_associate_ind (struct net_device *dev, u8 *mac_addr, #define LEC_ARP_REFRESH_INTERVAL (3*HZ) -static void lec_arp_check_expire(unsigned long data); +static void lec_arp_check_expire(void *data); static void lec_arp_expire_arp(unsigned long data); /* @@ -1430,475 +1470,397 @@ static void lec_arp_expire_arp(unsigned long data); /* * Initialization of arp-cache */ -static void -lec_arp_init(struct lec_priv *priv) +static void lec_arp_init(struct lec_priv *priv) { - unsigned short i; + unsigned short i; - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - priv->lec_arp_tables[i] = NULL; - } + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); + } + INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); + INIT_HLIST_HEAD(&priv->lec_no_forward); + INIT_HLIST_HEAD(&priv->mcast_fwds); spin_lock_init(&priv->lec_arp_lock); - init_timer(&priv->lec_arp_timer); - priv->lec_arp_timer.expires = jiffies + LEC_ARP_REFRESH_INTERVAL; - priv->lec_arp_timer.data = (unsigned long)priv; - priv->lec_arp_timer.function = lec_arp_check_expire; - add_timer(&priv->lec_arp_timer); + INIT_WORK(&priv->lec_arp_work, lec_arp_check_expire, priv); + schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); } -static void -lec_arp_clear_vccs(struct lec_arp_table *entry) +static void lec_arp_clear_vccs(struct lec_arp_table *entry) { - if (entry->vcc) { + if (entry->vcc) { struct atm_vcc *vcc = entry->vcc; struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); - struct net_device *dev = (struct net_device*) vcc->proto_data; + struct net_device *dev = (struct net_device *)vcc->proto_data; - vcc->pop = vpriv->old_pop; + vcc->pop = vpriv->old_pop; if (vpriv->xoff) netif_wake_queue(dev); kfree(vpriv); vcc->user_back = NULL; - vcc->push = entry->old_push; + vcc->push = entry->old_push; vcc_release_async(vcc, -EPIPE); - vcc = NULL; - } - if (entry->recv_vcc) { - entry->recv_vcc->push = entry->old_recv_push; + entry->vcc = NULL; + } + if (entry->recv_vcc) { + entry->recv_vcc->push = entry->old_recv_push; vcc_release_async(entry->recv_vcc, -EPIPE); - entry->recv_vcc = NULL; - } + entry->recv_vcc = NULL; + } } /* * Insert entry to lec_arp_table * LANE2: Add to the end of the list to satisfy 8.1.13 */ -static inline void -lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add) +static inline void +lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) { - unsigned short place; - struct lec_arp_table *tmp; - - place = HASH(to_add->mac_addr[ETH_ALEN-1]); - tmp = priv->lec_arp_tables[place]; - to_add->next = NULL; - if (tmp == NULL) - priv->lec_arp_tables[place] = to_add; - - else { /* add to the end */ - while (tmp->next) - tmp = tmp->next; - tmp->next = to_add; - } - - DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - 0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1], - 0xff&to_add->mac_addr[2], 0xff&to_add->mac_addr[3], - 0xff&to_add->mac_addr[4], 0xff&to_add->mac_addr[5]); + struct hlist_head *tmp; + + tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])]; + hlist_add_head(&entry->next, tmp); + + DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + 0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1], + 0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3], + 0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]); } /* * Remove entry from lec_arp_table */ -static int -lec_arp_remove(struct lec_priv *priv, - struct lec_arp_table *to_remove) +static int +lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) { - unsigned short place; - struct lec_arp_table *tmp; - int remove_vcc=1; - - if (!to_remove) { - return -1; - } - place = HASH(to_remove->mac_addr[ETH_ALEN-1]); - tmp = priv->lec_arp_tables[place]; - if (tmp == to_remove) { - priv->lec_arp_tables[place] = tmp->next; - } else { - while(tmp && tmp->next != to_remove) { - tmp = tmp->next; - } - if (!tmp) {/* Entry was not found */ - return -1; - } - } - tmp->next = to_remove->next; - del_timer(&to_remove->timer); - - /* If this is the only MAC connected to this VCC, also tear down - the VCC */ - if (to_remove->status >= ESI_FLUSH_PENDING) { - /* - * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT - */ - for(place = 0; place < LEC_ARP_TABLE_SIZE; place++) { - for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) { - if (memcmp(tmp->atm_addr, to_remove->atm_addr, - ATM_ESA_LEN)==0) { - remove_vcc=0; - break; - } - } - } - if (remove_vcc) - lec_arp_clear_vccs(to_remove); - } - skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - - DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - 0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1], - 0xff&to_remove->mac_addr[2], 0xff&to_remove->mac_addr[3], - 0xff&to_remove->mac_addr[4], 0xff&to_remove->mac_addr[5]); - return 0; + struct hlist_node *node; + struct lec_arp_table *entry; + int i, remove_vcc = 1; + + if (!to_remove) { + return -1; + } + + hlist_del(&to_remove->next); + del_timer(&to_remove->timer); + + /* If this is the only MAC connected to this VCC, also tear down the VCC */ + if (to_remove->status >= ESI_FLUSH_PENDING) { + /* + * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT + */ + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (memcmp(to_remove->atm_addr, + entry->atm_addr, ATM_ESA_LEN) == 0) { + remove_vcc = 0; + break; + } + } + } + if (remove_vcc) + lec_arp_clear_vccs(to_remove); + } + skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ + + DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + 0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1], + 0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3], + 0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]); + return 0; } #if DEBUG_ARP_TABLE -static char* -get_status_string(unsigned char st) +static char *get_status_string(unsigned char st) { - switch(st) { - case ESI_UNKNOWN: - return "ESI_UNKNOWN"; - case ESI_ARP_PENDING: - return "ESI_ARP_PENDING"; - case ESI_VC_PENDING: - return "ESI_VC_PENDING"; - case ESI_FLUSH_PENDING: - return "ESI_FLUSH_PENDING"; - case ESI_FORWARD_DIRECT: - return "ESI_FORWARD_DIRECT"; - default: - return "<UNKNOWN>"; - } + switch (st) { + case ESI_UNKNOWN: + return "ESI_UNKNOWN"; + case ESI_ARP_PENDING: + return "ESI_ARP_PENDING"; + case ESI_VC_PENDING: + return "ESI_VC_PENDING"; + case ESI_FLUSH_PENDING: + return "ESI_FLUSH_PENDING"; + case ESI_FORWARD_DIRECT: + return "ESI_FORWARD_DIRECT"; + default: + return "<UNKNOWN>"; + } } -#endif -static void -dump_arp_table(struct lec_priv *priv) +static void dump_arp_table(struct lec_priv *priv) { -#if DEBUG_ARP_TABLE - int i,j, offset; - struct lec_arp_table *rulla; - char buf[1024]; - struct lec_arp_table **lec_arp_tables = - (struct lec_arp_table **)priv->lec_arp_tables; - struct lec_arp_table *lec_arp_empty_ones = - (struct lec_arp_table *)priv->lec_arp_empty_ones; - struct lec_arp_table *lec_no_forward = - (struct lec_arp_table *)priv->lec_no_forward; - struct lec_arp_table *mcast_fwds = priv->mcast_fwds; - - - printk("Dump %p:\n",priv); - for (i=0;i<LEC_ARP_TABLE_SIZE;i++) { - rulla = lec_arp_tables[i]; - offset = 0; - offset += sprintf(buf,"%d: %p\n",i, rulla); - while (rulla) { - offset += sprintf(buf+offset,"Mac:"); - for(j=0;j<ETH_ALEN;j++) { - offset+=sprintf(buf+offset, - "%2.2x ", - rulla->mac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;j<ATM_ESA_LEN;j++) { - offset+=sprintf(buf+offset, - "%2.2x ", - rulla->atm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%p\n",rulla->next); - rulla = rulla->next; - } - printk("%s",buf); - } - rulla = lec_no_forward; - if (rulla) - printk("No forward\n"); - while(rulla) { - offset=0; - offset += sprintf(buf+offset,"Mac:"); - for(j=0;j<ETH_ALEN;j++) { - offset+=sprintf(buf+offset,"%2.2x ", - rulla->mac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;j<ATM_ESA_LEN;j++) { - offset+=sprintf(buf+offset,"%2.2x ", - rulla->atm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next); - rulla = rulla->next; - printk("%s",buf); - } - rulla = lec_arp_empty_ones; - if (rulla) - printk("Empty ones\n"); - while(rulla) { - offset=0; - offset += sprintf(buf+offset,"Mac:"); - for(j=0;j<ETH_ALEN;j++) { - offset+=sprintf(buf+offset,"%2.2x ", - rulla->mac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;j<ATM_ESA_LEN;j++) { - offset+=sprintf(buf+offset,"%2.2x ", - rulla->atm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next); - rulla = rulla->next; - printk("%s",buf); - } - - rulla = mcast_fwds; - if (rulla) - printk("Multicast Forward VCCs\n"); - while(rulla) { - offset=0; - offset += sprintf(buf+offset,"Mac:"); - for(j=0;j<ETH_ALEN;j++) { - offset+=sprintf(buf+offset,"%2.2x ", - rulla->mac_addr[j]&0xff); - } - offset +=sprintf(buf+offset,"Atm:"); - for(j=0;j<ATM_ESA_LEN;j++) { - offset+=sprintf(buf+offset,"%2.2x ", - rulla->atm_addr[j]&0xff); - } - offset+=sprintf(buf+offset, - "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", - rulla->vcc?rulla->vcc->vpi:0, - rulla->vcc?rulla->vcc->vci:0, - rulla->recv_vcc?rulla->recv_vcc->vpi:0, - rulla->recv_vcc?rulla->recv_vcc->vci:0, - rulla->last_used, - rulla->timestamp, rulla->no_tries); - offset+=sprintf(buf+offset, - "Flags:%x, Packets_flooded:%x, Status: %s ", - rulla->flags, rulla->packets_flooded, - get_status_string(rulla->status)); - offset+=sprintf(buf+offset,"->%lx\n",(long)rulla->next); - rulla = rulla->next; - printk("%s",buf); - } + struct hlist_node *node; + struct lec_arp_table *rulla; + char buf[256]; + int i, j, offset; + + printk("Dump %p:\n", priv); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(rulla, node, &priv->lec_arp_tables[i], next) { + offset = 0; + offset += sprintf(buf, "%d: %p\n", i, rulla); + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, + "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, + "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc-> + vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc-> + vci : 0, rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += + sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s\n", buf); + } + } + + if (!hlist_empty(&priv->lec_no_forward)) + printk("No forward\n"); + hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) { + offset = 0; + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc->vci : 0, + rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s\n", buf); + } + + if (!hlist_empty(&priv->lec_arp_empty_ones)) + printk("Empty ones\n"); + hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) { + offset = 0; + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc->vci : 0, + rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s", buf); + } + + if (!hlist_empty(&priv->mcast_fwds)) + printk("Multicast Forward VCCs\n"); + hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) { + offset = 0; + offset += sprintf(buf + offset, "Mac:"); + for (j = 0; j < ETH_ALEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->mac_addr[j] & 0xff); + } + offset += sprintf(buf + offset, "Atm:"); + for (j = 0; j < ATM_ESA_LEN; j++) { + offset += sprintf(buf + offset, "%2.2x ", + rulla->atm_addr[j] & 0xff); + } + offset += sprintf(buf + offset, + "Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ", + rulla->vcc ? rulla->vcc->vpi : 0, + rulla->vcc ? rulla->vcc->vci : 0, + rulla->recv_vcc ? rulla->recv_vcc->vpi : 0, + rulla->recv_vcc ? rulla->recv_vcc->vci : 0, + rulla->last_used, + rulla->timestamp, rulla->no_tries); + offset += sprintf(buf + offset, + "Flags:%x, Packets_flooded:%x, Status: %s ", + rulla->flags, rulla->packets_flooded, + get_status_string(rulla->status)); + printk("%s\n", buf); + } -#endif } +#else +#define dump_arp_table(priv) do { } while (0) +#endif /* * Destruction of arp-cache */ -static void -lec_arp_destroy(struct lec_priv *priv) +static void lec_arp_destroy(struct lec_priv *priv) { unsigned long flags; - struct lec_arp_table *entry, *next; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + int i; + + cancel_rearming_delayed_work(&priv->lec_arp_work); - del_timer_sync(&priv->lec_arp_timer); - - /* - * Remove all entries - */ + /* + * Remove all entries + */ spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; entry=next) { - next = entry->next; - lec_arp_remove(priv, entry); - kfree(entry); - } - } - entry = priv->lec_arp_empty_ones; - while(entry) { - next = entry->next; - del_timer_sync(&entry->timer); - lec_arp_clear_vccs(entry); - kfree(entry); - entry = next; - } - priv->lec_arp_empty_ones = NULL; - entry = priv->lec_no_forward; - while(entry) { - next = entry->next; - del_timer_sync(&entry->timer); - lec_arp_clear_vccs(entry); - kfree(entry); - entry = next; - } - priv->lec_no_forward = NULL; - entry = priv->mcast_fwds; - while(entry) { - next = entry->next; - /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ - lec_arp_clear_vccs(entry); - kfree(entry); - entry = next; - } - priv->mcast_fwds = NULL; - priv->mcast_vcc = NULL; - memset(priv->lec_arp_tables, 0, - sizeof(struct lec_arp_table *) * LEC_ARP_TABLE_SIZE); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + lec_arp_remove(priv, entry); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); + } + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + del_timer_sync(&entry->timer); + lec_arp_clear_vccs(entry); + hlist_del(&entry->next); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) { + del_timer_sync(&entry->timer); + lec_arp_clear_vccs(entry); + hlist_del(&entry->next); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->lec_no_forward); + + hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ + lec_arp_clear_vccs(entry); + hlist_del(&entry->next); + lec_arp_put(entry); + } + INIT_HLIST_HEAD(&priv->mcast_fwds); + priv->mcast_vcc = NULL; spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } - /* * Find entry by mac_address */ -static struct lec_arp_table* -lec_arp_find(struct lec_priv *priv, - unsigned char *mac_addr) +static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, + unsigned char *mac_addr) { - unsigned short place; - struct lec_arp_table *to_return; - - DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", - mac_addr[0]&0xff, mac_addr[1]&0xff, mac_addr[2]&0xff, - mac_addr[3]&0xff, mac_addr[4]&0xff, mac_addr[5]&0xff); - place = HASH(mac_addr[ETH_ALEN-1]); - - to_return = priv->lec_arp_tables[place]; - while(to_return) { - if (!compare_ether_addr(mac_addr, to_return->mac_addr)) { - return to_return; - } - to_return = to_return->next; - } - return NULL; + struct hlist_node *node; + struct hlist_head *head; + struct lec_arp_table *entry; + + DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff, + mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff); + + head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])]; + hlist_for_each_entry(entry, node, head, next) { + if (!compare_ether_addr(mac_addr, entry->mac_addr)) { + return entry; + } + } + return NULL; } -static struct lec_arp_table* -make_entry(struct lec_priv *priv, unsigned char *mac_addr) +static struct lec_arp_table *make_entry(struct lec_priv *priv, + unsigned char *mac_addr) { - struct lec_arp_table *to_return; - - to_return = kmalloc(sizeof(struct lec_arp_table), GFP_ATOMIC); - if (!to_return) { - printk("LEC: Arp entry kmalloc failed\n"); - return NULL; - } - memset(to_return, 0, sizeof(struct lec_arp_table)); - memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); - init_timer(&to_return->timer); - to_return->timer.function = lec_arp_expire_arp; - to_return->timer.data = (unsigned long) to_return; - to_return->last_used = jiffies; - to_return->priv = priv; - skb_queue_head_init(&to_return->tx_wait); - return to_return; + struct lec_arp_table *to_return; + + to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC); + if (!to_return) { + printk("LEC: Arp entry kmalloc failed\n"); + return NULL; + } + memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); + INIT_HLIST_NODE(&to_return->next); + init_timer(&to_return->timer); + to_return->timer.function = lec_arp_expire_arp; + to_return->timer.data = (unsigned long)to_return; + to_return->last_used = jiffies; + to_return->priv = priv; + skb_queue_head_init(&to_return->tx_wait); + atomic_set(&to_return->usage, 1); + return to_return; } -/* - * - * Arp sent timer expired - * - */ -static void -lec_arp_expire_arp(unsigned long data) +/* Arp sent timer expired */ +static void lec_arp_expire_arp(unsigned long data) { - struct lec_arp_table *entry; - - entry = (struct lec_arp_table *)data; - - DPRINTK("lec_arp_expire_arp\n"); - if (entry->status == ESI_ARP_PENDING) { - if (entry->no_tries <= entry->priv->max_retry_count) { - if (entry->is_rdesc) - send_to_lecd(entry->priv, l_rdesc_arp_xmt, entry->mac_addr, NULL, NULL); - else - send_to_lecd(entry->priv, l_arp_xmt, entry->mac_addr, NULL, NULL); - entry->no_tries++; - } - mod_timer(&entry->timer, jiffies + (1*HZ)); - } + struct lec_arp_table *entry; + + entry = (struct lec_arp_table *)data; + + DPRINTK("lec_arp_expire_arp\n"); + if (entry->status == ESI_ARP_PENDING) { + if (entry->no_tries <= entry->priv->max_retry_count) { + if (entry->is_rdesc) + send_to_lecd(entry->priv, l_rdesc_arp_xmt, + entry->mac_addr, NULL, NULL); + else + send_to_lecd(entry->priv, l_arp_xmt, + entry->mac_addr, NULL, NULL); + entry->no_tries++; + } + mod_timer(&entry->timer, jiffies + (1 * HZ)); + } } -/* - * - * Unknown/unused vcc expire, remove associated entry - * - */ -static void -lec_arp_expire_vcc(unsigned long data) +/* Unknown/unused vcc expire, remove associated entry */ +static void lec_arp_expire_vcc(unsigned long data) { unsigned long flags; - struct lec_arp_table *to_remove = (struct lec_arp_table*)data; - struct lec_priv *priv = (struct lec_priv *)to_remove->priv; - struct lec_arp_table *entry = NULL; + struct lec_arp_table *to_remove = (struct lec_arp_table *)data; + struct lec_priv *priv = (struct lec_priv *)to_remove->priv; - del_timer(&to_remove->timer); + del_timer(&to_remove->timer); - DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", - to_remove, priv, - to_remove->vcc?to_remove->recv_vcc->vpi:0, - to_remove->vcc?to_remove->recv_vcc->vci:0); - DPRINTK("eo:%p nf:%p\n",priv->lec_arp_empty_ones,priv->lec_no_forward); + DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", + to_remove, priv, + to_remove->vcc ? to_remove->recv_vcc->vpi : 0, + to_remove->vcc ? to_remove->recv_vcc->vci : 0); spin_lock_irqsave(&priv->lec_arp_lock, flags); - if (to_remove == priv->lec_arp_empty_ones) - priv->lec_arp_empty_ones = to_remove->next; - else { - entry = priv->lec_arp_empty_ones; - while (entry && entry->next != to_remove) - entry = entry->next; - if (entry) - entry->next = to_remove->next; - } - if (!entry) { - if (to_remove == priv->lec_no_forward) { - priv->lec_no_forward = to_remove->next; - } else { - entry = priv->lec_no_forward; - while (entry && entry->next != to_remove) - entry = entry->next; - if (entry) - entry->next = to_remove->next; - } - } + hlist_del(&to_remove->next); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - lec_arp_clear_vccs(to_remove); - kfree(to_remove); + lec_arp_clear_vccs(to_remove); + lec_arp_put(to_remove); } /* @@ -1917,158 +1879,171 @@ lec_arp_expire_vcc(unsigned long data) * to ESI_FORWARD_DIRECT. This causes the flush period to end * regardless of the progress of the flush protocol. */ -static void -lec_arp_check_expire(unsigned long data) +static void lec_arp_check_expire(void *data) { unsigned long flags; - struct lec_priv *priv = (struct lec_priv *)data; - struct lec_arp_table *entry, *next; - unsigned long now; - unsigned long time_to_check; - int i; - - DPRINTK("lec_arp_check_expire %p\n",priv); - DPRINTK("expire: eo:%p nf:%p\n",priv->lec_arp_empty_ones, - priv->lec_no_forward); + struct lec_priv *priv = data; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + unsigned long now; + unsigned long time_to_check; + int i; + + DPRINTK("lec_arp_check_expire %p\n", priv); now = jiffies; +restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); - for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; ) { - if ((entry->flags) & LEC_REMOTE_FLAG && + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + if ((entry->flags) & LEC_REMOTE_FLAG && priv->topology_change) time_to_check = priv->forward_delay_time; else time_to_check = priv->aging_time; DPRINTK("About to expire: %lx - %lx > %lx\n", - now,entry->last_used, time_to_check); - if( time_after(now, entry->last_used+ - time_to_check) && - !(entry->flags & LEC_PERMANENT_FLAG) && - !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */ + now, entry->last_used, time_to_check); + if (time_after(now, entry->last_used + time_to_check) + && !(entry->flags & LEC_PERMANENT_FLAG) + && !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */ /* Remove entry */ DPRINTK("LEC:Entry timed out\n"); - next = entry->next; lec_arp_remove(priv, entry); - kfree(entry); - entry = next; + lec_arp_put(entry); } else { /* Something else */ if ((entry->status == ESI_VC_PENDING || - entry->status == ESI_ARP_PENDING) + entry->status == ESI_ARP_PENDING) && time_after_eq(now, - entry->timestamp + - priv->max_unknown_frame_time)) { + entry->timestamp + + priv-> + max_unknown_frame_time)) { entry->timestamp = jiffies; entry->packets_flooded = 0; if (entry->status == ESI_VC_PENDING) - send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL); + send_to_lecd(priv, l_svc_setup, + entry->mac_addr, + entry->atm_addr, + NULL); } - if (entry->status == ESI_FLUSH_PENDING - && - time_after_eq(now, entry->timestamp+ - priv->path_switching_delay)) { + if (entry->status == ESI_FLUSH_PENDING + && + time_after_eq(now, entry->timestamp + + priv->path_switching_delay)) { struct sk_buff *skb; + struct atm_vcc *vcc = entry->vcc; + lec_arp_hold(entry); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(entry->vcc, skb, entry->priv); + lec_send(vcc, skb, entry->priv); entry->last_used = jiffies; - entry->status = - ESI_FORWARD_DIRECT; + entry->status = ESI_FORWARD_DIRECT; + lec_arp_put(entry); + goto restart; } - entry = entry->next; } } } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - mod_timer(&priv->lec_arp_timer, jiffies + LEC_ARP_REFRESH_INTERVAL); + schedule_delayed_work(&priv->lec_arp_work, LEC_ARP_REFRESH_INTERVAL); } + /* * Try to find vcc where mac_address is attached. * */ -static struct atm_vcc* -lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, - int is_rdesc, struct lec_arp_table **ret_entry) +static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, + unsigned char *mac_to_find, int is_rdesc, + struct lec_arp_table **ret_entry) { unsigned long flags; - struct lec_arp_table *entry; + struct lec_arp_table *entry; struct atm_vcc *found; - if (mac_to_find[0] & 0x01) { - switch (priv->lane_version) { - case 1: - return priv->mcast_vcc; - break; - case 2: /* LANE2 wants arp for multicast addresses */ - if (!compare_ether_addr(mac_to_find, bus_mac)) - return priv->mcast_vcc; - break; - default: - break; - } - } + if (mac_to_find[0] & 0x01) { + switch (priv->lane_version) { + case 1: + return priv->mcast_vcc; + break; + case 2: /* LANE2 wants arp for multicast addresses */ + if (!compare_ether_addr(mac_to_find, bus_mac)) + return priv->mcast_vcc; + break; + default: + break; + } + } spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mac_to_find); - - if (entry) { - if (entry->status == ESI_FORWARD_DIRECT) { - /* Connection Ok */ - entry->last_used = jiffies; - *ret_entry = entry; - found = entry->vcc; + entry = lec_arp_find(priv, mac_to_find); + + if (entry) { + if (entry->status == ESI_FORWARD_DIRECT) { + /* Connection Ok */ + entry->last_used = jiffies; + lec_arp_hold(entry); + *ret_entry = entry; + found = entry->vcc; goto out; - } - /* If the LE_ARP cache entry is still pending, reset count to 0 + } + /* + * If the LE_ARP cache entry is still pending, reset count to 0 * so another LE_ARP request can be made for this frame. */ if (entry->status == ESI_ARP_PENDING) { entry->no_tries = 0; } - /* Data direct VC not yet set up, check to see if the unknown - frame count is greater than the limit. If the limit has - not been reached, allow the caller to send packet to - BUS. */ - if (entry->status != ESI_FLUSH_PENDING && - entry->packets_flooded<priv->maximum_unknown_frame_count) { - entry->packets_flooded++; - DPRINTK("LEC_ARP: Flooding..\n"); - found = priv->mcast_vcc; + /* + * Data direct VC not yet set up, check to see if the unknown + * frame count is greater than the limit. If the limit has + * not been reached, allow the caller to send packet to + * BUS. + */ + if (entry->status != ESI_FLUSH_PENDING && + entry->packets_flooded < + priv->maximum_unknown_frame_count) { + entry->packets_flooded++; + DPRINTK("LEC_ARP: Flooding..\n"); + found = priv->mcast_vcc; goto out; - } - /* We got here because entry->status == ESI_FLUSH_PENDING + } + /* + * We got here because entry->status == ESI_FLUSH_PENDING * or BUS flood limit was reached for an entry which is * in ESI_ARP_PENDING or ESI_VC_PENDING state. */ - *ret_entry = entry; - DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc); - found = NULL; - } else { - /* No matching entry was found */ - entry = make_entry(priv, mac_to_find); - DPRINTK("LEC_ARP: Making entry\n"); - if (!entry) { - found = priv->mcast_vcc; + lec_arp_hold(entry); + *ret_entry = entry; + DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, + entry->vcc); + found = NULL; + } else { + /* No matching entry was found */ + entry = make_entry(priv, mac_to_find); + DPRINTK("LEC_ARP: Making entry\n"); + if (!entry) { + found = priv->mcast_vcc; goto out; - } - lec_arp_add(priv, entry); - /* We want arp-request(s) to be sent */ - entry->packets_flooded =1; - entry->status = ESI_ARP_PENDING; - entry->no_tries = 1; - entry->last_used = entry->timestamp = jiffies; - entry->is_rdesc = is_rdesc; - if (entry->is_rdesc) - send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, NULL); - else - send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); - entry->timer.expires = jiffies + (1*HZ); - entry->timer.function = lec_arp_expire_arp; - add_timer(&entry->timer); - found = priv->mcast_vcc; - } + } + lec_arp_add(priv, entry); + /* We want arp-request(s) to be sent */ + entry->packets_flooded = 1; + entry->status = ESI_ARP_PENDING; + entry->no_tries = 1; + entry->last_used = entry->timestamp = jiffies; + entry->is_rdesc = is_rdesc; + if (entry->is_rdesc) + send_to_lecd(priv, l_rdesc_arp_xmt, mac_to_find, NULL, + NULL); + else + send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); + entry->timer.expires = jiffies + (1 * HZ); + entry->timer.function = lec_arp_expire_arp; + add_timer(&entry->timer); + found = priv->mcast_vcc; + } out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2076,30 +2051,30 @@ out: } static int -lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, - unsigned long permanent) +lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, + unsigned long permanent) { unsigned long flags; - struct lec_arp_table *entry, *next; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + int i; - DPRINTK("lec_addr_delete\n"); + DPRINTK("lec_addr_delete\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); - for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; entry = next) { - next = entry->next; - if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) - && (permanent || - !(entry->flags & LEC_PERMANENT_FLAG))) { + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) + && (permanent || + !(entry->flags & LEC_PERMANENT_FLAG))) { lec_arp_remove(priv, entry); - kfree(entry); - } + lec_arp_put(entry); + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return 0; - } - } + return 0; + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return -1; + return -1; } /* @@ -2107,109 +2082,98 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, */ static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, - unsigned char *atm_addr, unsigned long remoteflag, - unsigned int targetless_le_arp) + unsigned char *atm_addr, unsigned long remoteflag, + unsigned int targetless_le_arp) { unsigned long flags; - struct lec_arp_table *entry, *tmp; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry, *tmp; + int i; - DPRINTK("lec:%s", (targetless_le_arp) ? "targetless ": " "); - DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", - mac_addr[0],mac_addr[1],mac_addr[2],mac_addr[3], - mac_addr[4],mac_addr[5]); + DPRINTK("lec:%s", (targetless_le_arp) ? "targetless " : " "); + DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], + mac_addr[4], mac_addr[5]); spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = lec_arp_find(priv, mac_addr); - if (entry == NULL && targetless_le_arp) - goto out; /* LANE2: ignore targetless LE_ARPs for which - * we have no entry in the cache. 7.1.30 - */ - if (priv->lec_arp_empty_ones) { - entry = priv->lec_arp_empty_ones; - if (!memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN)) { - priv->lec_arp_empty_ones = entry->next; - } else { - while(entry->next && memcmp(entry->next->atm_addr, - atm_addr, ATM_ESA_LEN)) - entry = entry->next; - if (entry->next) { - tmp = entry; - entry = entry->next; - tmp->next = entry->next; - } else - entry = NULL; - - } - if (entry) { - del_timer(&entry->timer); - tmp = lec_arp_find(priv, mac_addr); - if (tmp) { - del_timer(&tmp->timer); - tmp->status = ESI_FORWARD_DIRECT; - memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN); - tmp->vcc = entry->vcc; - tmp->old_push = entry->old_push; - tmp->last_used = jiffies; - del_timer(&entry->timer); - kfree(entry); - entry=tmp; - } else { - entry->status = ESI_FORWARD_DIRECT; - memcpy(entry->mac_addr, mac_addr, ETH_ALEN); - entry->last_used = jiffies; - lec_arp_add(priv, entry); - } - if (remoteflag) - entry->flags|=LEC_REMOTE_FLAG; - else - entry->flags&=~LEC_REMOTE_FLAG; - DPRINTK("After update\n"); - dump_arp_table(priv); - goto out; - } - } - entry = lec_arp_find(priv, mac_addr); - if (!entry) { - entry = make_entry(priv, mac_addr); - if (!entry) + entry = lec_arp_find(priv, mac_addr); + if (entry == NULL && targetless_le_arp) + goto out; /* + * LANE2: ignore targetless LE_ARPs for which + * we have no entry in the cache. 7.1.30 + */ + if (!hlist_empty(&priv->lec_arp_empty_ones)) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { + hlist_del(&entry->next); + del_timer(&entry->timer); + tmp = lec_arp_find(priv, mac_addr); + if (tmp) { + del_timer(&tmp->timer); + tmp->status = ESI_FORWARD_DIRECT; + memcpy(tmp->atm_addr, atm_addr, ATM_ESA_LEN); + tmp->vcc = entry->vcc; + tmp->old_push = entry->old_push; + tmp->last_used = jiffies; + del_timer(&entry->timer); + lec_arp_put(entry); + entry = tmp; + } else { + entry->status = ESI_FORWARD_DIRECT; + memcpy(entry->mac_addr, mac_addr, ETH_ALEN); + entry->last_used = jiffies; + lec_arp_add(priv, entry); + } + if (remoteflag) + entry->flags |= LEC_REMOTE_FLAG; + else + entry->flags &= ~LEC_REMOTE_FLAG; + DPRINTK("After update\n"); + dump_arp_table(priv); + goto out; + } + } + } + + entry = lec_arp_find(priv, mac_addr); + if (!entry) { + entry = make_entry(priv, mac_addr); + if (!entry) goto out; - entry->status = ESI_UNKNOWN; - lec_arp_add(priv, entry); - /* Temporary, changes before end of function */ - } - memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); - del_timer(&entry->timer); - for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for(tmp = priv->lec_arp_tables[i]; tmp; tmp=tmp->next) { - if (entry != tmp && - !memcmp(tmp->atm_addr, atm_addr, - ATM_ESA_LEN)) { - /* Vcc to this host exists */ - if (tmp->status > ESI_VC_PENDING) { - /* - * ESI_FLUSH_PENDING, - * ESI_FORWARD_DIRECT - */ - entry->vcc = tmp->vcc; - entry->old_push=tmp->old_push; - } - entry->status=tmp->status; - break; - } - } - } - if (remoteflag) - entry->flags|=LEC_REMOTE_FLAG; - else - entry->flags&=~LEC_REMOTE_FLAG; - if (entry->status == ESI_ARP_PENDING || - entry->status == ESI_UNKNOWN) { - entry->status = ESI_VC_PENDING; - send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); - } - DPRINTK("After update2\n"); - dump_arp_table(priv); + entry->status = ESI_UNKNOWN; + lec_arp_add(priv, entry); + /* Temporary, changes before end of function */ + } + memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); + del_timer(&entry->timer); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(tmp, node, &priv->lec_arp_tables[i], next) { + if (entry != tmp && + !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { + /* Vcc to this host exists */ + if (tmp->status > ESI_VC_PENDING) { + /* + * ESI_FLUSH_PENDING, + * ESI_FORWARD_DIRECT + */ + entry->vcc = tmp->vcc; + entry->old_push = tmp->old_push; + } + entry->status = tmp->status; + break; + } + } + } + if (remoteflag) + entry->flags |= LEC_REMOTE_FLAG; + else + entry->flags &= ~LEC_REMOTE_FLAG; + if (entry->status == ESI_ARP_PENDING || entry->status == ESI_UNKNOWN) { + entry->status = ESI_VC_PENDING; + send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); + } + DPRINTK("After update2\n"); + dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } @@ -2219,299 +2183,299 @@ out: */ static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, - struct atm_vcc *vcc, - void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb)) + struct atm_vcc *vcc, + void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { unsigned long flags; - struct lec_arp_table *entry; - int i, found_entry=0; + struct hlist_node *node; + struct lec_arp_table *entry; + int i, found_entry = 0; spin_lock_irqsave(&priv->lec_arp_lock, flags); - if (ioc_data->receive == 2) { - /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ + if (ioc_data->receive == 2) { + /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ - DPRINTK("LEC_ARP: Attaching mcast forward\n"); + DPRINTK("LEC_ARP: Attaching mcast forward\n"); #if 0 - entry = lec_arp_find(priv, bus_mac); - if (!entry) { - printk("LEC_ARP: Multicast entry not found!\n"); + entry = lec_arp_find(priv, bus_mac); + if (!entry) { + printk("LEC_ARP: Multicast entry not found!\n"); goto out; - } - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; + } + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + entry->recv_vcc = vcc; + entry->old_recv_push = old_push; #endif - entry = make_entry(priv, bus_mac); - if (entry == NULL) + entry = make_entry(priv, bus_mac); + if (entry == NULL) goto out; - del_timer(&entry->timer); - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; - entry->next = priv->mcast_fwds; - priv->mcast_fwds = entry; - goto out; - } else if (ioc_data->receive == 1) { - /* Vcc which we don't want to make default vcc, attach it - anyway. */ - DPRINTK("LEC_ARP:Attaching data direct, not default :%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", - ioc_data->atm_addr[0],ioc_data->atm_addr[1], - ioc_data->atm_addr[2],ioc_data->atm_addr[3], - ioc_data->atm_addr[4],ioc_data->atm_addr[5], - ioc_data->atm_addr[6],ioc_data->atm_addr[7], - ioc_data->atm_addr[8],ioc_data->atm_addr[9], - ioc_data->atm_addr[10],ioc_data->atm_addr[11], - ioc_data->atm_addr[12],ioc_data->atm_addr[13], - ioc_data->atm_addr[14],ioc_data->atm_addr[15], - ioc_data->atm_addr[16],ioc_data->atm_addr[17], - ioc_data->atm_addr[18],ioc_data->atm_addr[19]); - entry = make_entry(priv, bus_mac); - if (entry == NULL) + del_timer(&entry->timer); + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + entry->recv_vcc = vcc; + entry->old_recv_push = old_push; + hlist_add_head(&entry->next, &priv->mcast_fwds); + goto out; + } else if (ioc_data->receive == 1) { + /* + * Vcc which we don't want to make default vcc, + * attach it anyway. + */ + DPRINTK + ("LEC_ARP:Attaching data direct, not default: " + "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + ioc_data->atm_addr[0], ioc_data->atm_addr[1], + ioc_data->atm_addr[2], ioc_data->atm_addr[3], + ioc_data->atm_addr[4], ioc_data->atm_addr[5], + ioc_data->atm_addr[6], ioc_data->atm_addr[7], + ioc_data->atm_addr[8], ioc_data->atm_addr[9], + ioc_data->atm_addr[10], ioc_data->atm_addr[11], + ioc_data->atm_addr[12], ioc_data->atm_addr[13], + ioc_data->atm_addr[14], ioc_data->atm_addr[15], + ioc_data->atm_addr[16], ioc_data->atm_addr[17], + ioc_data->atm_addr[18], ioc_data->atm_addr[19]); + entry = make_entry(priv, bus_mac); + if (entry == NULL) goto out; - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); - entry->recv_vcc = vcc; - entry->old_recv_push = old_push; - entry->status = ESI_UNKNOWN; - entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; - add_timer(&entry->timer); - entry->next = priv->lec_no_forward; - priv->lec_no_forward = entry; + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + memset(entry->mac_addr, 0, ETH_ALEN); + entry->recv_vcc = vcc; + entry->old_recv_push = old_push; + entry->status = ESI_UNKNOWN; + entry->timer.expires = jiffies + priv->vcc_timeout_period; + entry->timer.function = lec_arp_expire_vcc; + hlist_add_head(&entry->next, &priv->lec_no_forward); + add_timer(&entry->timer); dump_arp_table(priv); goto out; - } - DPRINTK("LEC_ARP:Attaching data direct, default:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", - ioc_data->atm_addr[0],ioc_data->atm_addr[1], - ioc_data->atm_addr[2],ioc_data->atm_addr[3], - ioc_data->atm_addr[4],ioc_data->atm_addr[5], - ioc_data->atm_addr[6],ioc_data->atm_addr[7], - ioc_data->atm_addr[8],ioc_data->atm_addr[9], - ioc_data->atm_addr[10],ioc_data->atm_addr[11], - ioc_data->atm_addr[12],ioc_data->atm_addr[13], - ioc_data->atm_addr[14],ioc_data->atm_addr[15], - ioc_data->atm_addr[16],ioc_data->atm_addr[17], - ioc_data->atm_addr[18],ioc_data->atm_addr[19]); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) { - if (memcmp(ioc_data->atm_addr, entry->atm_addr, - ATM_ESA_LEN)==0) { - DPRINTK("LEC_ARP: Attaching data direct\n"); - DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", - entry->vcc?entry->vcc->vci:0, - entry->recv_vcc?entry->recv_vcc->vci:0); - found_entry=1; - del_timer(&entry->timer); - entry->vcc = vcc; - entry->old_push = old_push; - if (entry->status == ESI_VC_PENDING) { - if(priv->maximum_unknown_frame_count - ==0) - entry->status = - ESI_FORWARD_DIRECT; - else { - entry->timestamp = jiffies; - entry->status = - ESI_FLUSH_PENDING; + } + DPRINTK + ("LEC_ARP:Attaching data direct, default: " + "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + ioc_data->atm_addr[0], ioc_data->atm_addr[1], + ioc_data->atm_addr[2], ioc_data->atm_addr[3], + ioc_data->atm_addr[4], ioc_data->atm_addr[5], + ioc_data->atm_addr[6], ioc_data->atm_addr[7], + ioc_data->atm_addr[8], ioc_data->atm_addr[9], + ioc_data->atm_addr[10], ioc_data->atm_addr[11], + ioc_data->atm_addr[12], ioc_data->atm_addr[13], + ioc_data->atm_addr[14], ioc_data->atm_addr[15], + ioc_data->atm_addr[16], ioc_data->atm_addr[17], + ioc_data->atm_addr[18], ioc_data->atm_addr[19]); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (memcmp + (ioc_data->atm_addr, entry->atm_addr, + ATM_ESA_LEN) == 0) { + DPRINTK("LEC_ARP: Attaching data direct\n"); + DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", + entry->vcc ? entry->vcc->vci : 0, + entry->recv_vcc ? entry->recv_vcc-> + vci : 0); + found_entry = 1; + del_timer(&entry->timer); + entry->vcc = vcc; + entry->old_push = old_push; + if (entry->status == ESI_VC_PENDING) { + if (priv->maximum_unknown_frame_count + == 0) + entry->status = + ESI_FORWARD_DIRECT; + else { + entry->timestamp = jiffies; + entry->status = + ESI_FLUSH_PENDING; #if 0 - send_to_lecd(priv,l_flush_xmt, - NULL, - entry->atm_addr, - NULL); + send_to_lecd(priv, l_flush_xmt, + NULL, + entry->atm_addr, + NULL); #endif - } - } else { - /* They were forming a connection - to us, and we to them. Our - ATM address is numerically lower - than theirs, so we make connection - we formed into default VCC (8.1.11). - Connection they made gets torn - down. This might confuse some - clients. Can be changed if - someone reports trouble... */ - ; - } - } - } - } - if (found_entry) { - DPRINTK("After vcc was added\n"); - dump_arp_table(priv); + } + } else { + /* + * They were forming a connection + * to us, and we to them. Our + * ATM address is numerically lower + * than theirs, so we make connection + * we formed into default VCC (8.1.11). + * Connection they made gets torn + * down. This might confuse some + * clients. Can be changed if + * someone reports trouble... + */ + ; + } + } + } + } + if (found_entry) { + DPRINTK("After vcc was added\n"); + dump_arp_table(priv); goto out; - } - /* Not found, snatch address from first data packet that arrives from - this vcc */ - entry = make_entry(priv, bus_mac); - if (!entry) + } + /* + * Not found, snatch address from first data packet that arrives + * from this vcc + */ + entry = make_entry(priv, bus_mac); + if (!entry) goto out; - entry->vcc = vcc; - entry->old_push = old_push; - memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); - memset(entry->mac_addr, 0, ETH_ALEN); - entry->status = ESI_UNKNOWN; - entry->next = priv->lec_arp_empty_ones; - priv->lec_arp_empty_ones = entry; - entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; - add_timer(&entry->timer); - DPRINTK("After vcc was added\n"); + entry->vcc = vcc; + entry->old_push = old_push; + memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); + memset(entry->mac_addr, 0, ETH_ALEN); + entry->status = ESI_UNKNOWN; + hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); + entry->timer.expires = jiffies + priv->vcc_timeout_period; + entry->timer.function = lec_arp_expire_vcc; + add_timer(&entry->timer); + DPRINTK("After vcc was added\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } -static void -lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) +static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) { unsigned long flags; - struct lec_arp_table *entry; - int i; - - DPRINTK("LEC:lec_flush_complete %lx\n",tran_id); + struct hlist_node *node; + struct lec_arp_table *entry; + int i; + + DPRINTK("LEC:lec_flush_complete %lx\n", tran_id); +restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) { - if (entry->flush_tran_id == tran_id && - entry->status == ESI_FLUSH_PENDING) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(entry->vcc, skb, entry->priv); - entry->status = ESI_FORWARD_DIRECT; - DPRINTK("LEC_ARP: Flushed\n"); - } - } - } + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (entry->flush_tran_id == tran_id + && entry->status == ESI_FLUSH_PENDING) { + struct sk_buff *skb; + struct atm_vcc *vcc = entry->vcc; + + lec_arp_hold(entry); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) + lec_send(vcc, skb, entry->priv); + entry->last_used = jiffies; + entry->status = ESI_FORWARD_DIRECT; + lec_arp_put(entry); + DPRINTK("LEC_ARP: Flushed\n"); + goto restart; + } + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - dump_arp_table(priv); + dump_arp_table(priv); } static void lec_set_flush_tran_id(struct lec_priv *priv, - unsigned char *atm_addr, unsigned long tran_id) + unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; - struct lec_arp_table *entry; - int i; + struct hlist_node *node; + struct lec_arp_table *entry; + int i; spin_lock_irqsave(&priv->lec_arp_lock, flags); - for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - for(entry = priv->lec_arp_tables[i]; entry; entry=entry->next) - if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { - entry->flush_tran_id = tran_id; - DPRINTK("Set flush transaction id to %lx for %p\n",tran_id,entry); - } + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) + hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { + if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { + entry->flush_tran_id = tran_id; + DPRINTK("Set flush transaction id to %lx for %p\n", + tran_id, entry); + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } -static int -lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) +static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - unsigned char mac_addr[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - struct lec_arp_table *to_add; + unsigned char mac_addr[] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + struct lec_arp_table *to_add; struct lec_vcc_priv *vpriv; int err = 0; - + if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL))) return -ENOMEM; vpriv->xoff = 0; vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; - vcc->pop = lec_pop; + vcc->pop = lec_pop; spin_lock_irqsave(&priv->lec_arp_lock, flags); - to_add = make_entry(priv, mac_addr); - if (!to_add) { + to_add = make_entry(priv, mac_addr); + if (!to_add) { vcc->pop = vpriv->old_pop; kfree(vpriv); - err = -ENOMEM; + err = -ENOMEM; goto out; - } - memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN); - to_add->status = ESI_FORWARD_DIRECT; - to_add->flags |= LEC_PERMANENT_FLAG; - to_add->vcc = vcc; - to_add->old_push = vcc->push; - vcc->push = lec_push; - priv->mcast_vcc = vcc; - lec_arp_add(priv, to_add); + } + memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN); + to_add->status = ESI_FORWARD_DIRECT; + to_add->flags |= LEC_PERMANENT_FLAG; + to_add->vcc = vcc; + to_add->old_push = vcc->push; + vcc->push = lec_push; + priv->mcast_vcc = vcc; + lec_arp_add(priv, to_add); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return err; + return err; } -static void -lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) +static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - struct lec_arp_table *entry, *next; - int i; + struct hlist_node *node, *next; + struct lec_arp_table *entry; + int i; + + DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); + dump_arp_table(priv); - DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n",vcc->vpi,vcc->vci); - dump_arp_table(priv); spin_lock_irqsave(&priv->lec_arp_lock, flags); - for(i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for(entry = priv->lec_arp_tables[i];entry; entry=next) { - next = entry->next; - if (vcc == entry->vcc) { - lec_arp_remove(priv, entry); - kfree(entry); - if (priv->mcast_vcc == vcc) { - priv->mcast_vcc = NULL; - } - } - } - } - - entry = priv->lec_arp_empty_ones; - priv->lec_arp_empty_ones = NULL; - while (entry != NULL) { - next = entry->next; - if (entry->vcc == vcc) { /* leave it out from the list */ - lec_arp_clear_vccs(entry); - del_timer(&entry->timer); - kfree(entry); - } - else { /* put it back to the list */ - entry->next = priv->lec_arp_empty_ones; - priv->lec_arp_empty_ones = entry; - } - entry = next; - } - - entry = priv->lec_no_forward; - priv->lec_no_forward = NULL; - while (entry != NULL) { - next = entry->next; - if (entry->recv_vcc == vcc) { - lec_arp_clear_vccs(entry); - del_timer(&entry->timer); - kfree(entry); - } - else { - entry->next = priv->lec_no_forward; - priv->lec_no_forward = entry; - } - entry = next; - } - - entry = priv->mcast_fwds; - priv->mcast_fwds = NULL; - while (entry != NULL) { - next = entry->next; - if (entry->recv_vcc == vcc) { - lec_arp_clear_vccs(entry); - /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ - kfree(entry); - } - else { - entry->next = priv->mcast_fwds; - priv->mcast_fwds = entry; - } - entry = next; - } + + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { + if (vcc == entry->vcc) { + lec_arp_remove(priv, entry); + lec_arp_put(entry); + if (priv->mcast_vcc == vcc) { + priv->mcast_vcc = NULL; + } + } + } + } + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + if (entry->vcc == vcc) { + lec_arp_clear_vccs(entry); + del_timer(&entry->timer); + hlist_del(&entry->next); + lec_arp_put(entry); + } + } + + hlist_for_each_entry_safe(entry, node, next, &priv->lec_no_forward, next) { + if (entry->recv_vcc == vcc) { + lec_arp_clear_vccs(entry); + del_timer(&entry->timer); + hlist_del(&entry->next); + lec_arp_put(entry); + } + } + + hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + if (entry->recv_vcc == vcc) { + lec_arp_clear_vccs(entry); + /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ + hlist_del(&entry->next); + lec_arp_put(entry); + } + } spin_unlock_irqrestore(&priv->lec_arp_lock, flags); dump_arp_table(priv); @@ -2519,57 +2483,42 @@ lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) static void lec_arp_check_empties(struct lec_priv *priv, - struct atm_vcc *vcc, struct sk_buff *skb) + struct atm_vcc *vcc, struct sk_buff *skb) { - unsigned long flags; - struct lec_arp_table *entry, *prev; - struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; - unsigned char *src; + unsigned long flags; + struct hlist_node *node, *next; + struct lec_arp_table *entry, *tmp; + struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; + unsigned char *src; #ifdef CONFIG_TR - struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data; + struct lecdatahdr_8025 *tr_hdr = (struct lecdatahdr_8025 *)skb->data; - if (priv->is_trdev) src = tr_hdr->h_source; - else + if (priv->is_trdev) + src = tr_hdr->h_source; + else #endif - src = hdr->h_source; + src = hdr->h_source; spin_lock_irqsave(&priv->lec_arp_lock, flags); - entry = priv->lec_arp_empty_ones; - if (vcc == entry->vcc) { - del_timer(&entry->timer); - memcpy(entry->mac_addr, src, ETH_ALEN); - entry->status = ESI_FORWARD_DIRECT; - entry->last_used = jiffies; - priv->lec_arp_empty_ones = entry->next; - /* We might have got an entry */ - if ((prev = lec_arp_find(priv,src))) { - lec_arp_remove(priv, prev); - kfree(prev); - } - lec_arp_add(priv, entry); - goto out; - } - prev = entry; - entry = entry->next; - while (entry && entry->vcc != vcc) { - prev= entry; - entry = entry->next; - } - if (!entry) { - DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); - goto out; - } - del_timer(&entry->timer); - memcpy(entry->mac_addr, src, ETH_ALEN); - entry->status = ESI_FORWARD_DIRECT; - entry->last_used = jiffies; - prev->next = entry->next; - if ((prev = lec_arp_find(priv, src))) { - lec_arp_remove(priv, prev); - kfree(prev); - } - lec_arp_add(priv, entry); + hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_empty_ones, next) { + if (vcc == entry->vcc) { + del_timer(&entry->timer); + memcpy(entry->mac_addr, src, ETH_ALEN); + entry->status = ESI_FORWARD_DIRECT; + entry->last_used = jiffies; + /* We might have got an entry */ + if ((tmp = lec_arp_find(priv, src))) { + lec_arp_remove(priv, tmp); + lec_arp_put(tmp); + } + hlist_del(&entry->next); + lec_arp_add(priv, entry); + goto out; + } + } + DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } + MODULE_LICENSE("GPL"); diff --git a/net/atm/lec.h b/net/atm/lec.h index 6606082b29a8..8ac6b7321635 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -1,9 +1,7 @@ /* - * * Lan Emulation client header file * - * Marko Kiiskila mkiiskila@yahoo.com - * + * Marko Kiiskila <mkiiskila@yahoo.com> */ #ifndef _LEC_H_ @@ -17,18 +15,18 @@ #define LEC_HEADER_LEN 16 struct lecdatahdr_8023 { - unsigned short le_header; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; - unsigned short h_type; + unsigned short le_header; + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_type; }; struct lecdatahdr_8025 { - unsigned short le_header; - unsigned char ac_pad; - unsigned char fc; - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + unsigned short le_header; + unsigned char ac_pad; + unsigned char fc; + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; }; #define LEC_MINIMUM_8023_SIZE 62 @@ -45,17 +43,18 @@ struct lecdatahdr_8025 { * */ struct lane2_ops { - int (*resolve)(struct net_device *dev, u8 *dst_mac, int force, - u8 **tlvs, u32 *sizeoftlvs); - int (*associate_req)(struct net_device *dev, u8 *lan_dst, - u8 *tlvs, u32 sizeoftlvs); - void (*associate_indicator)(struct net_device *dev, u8 *mac_addr, - u8 *tlvs, u32 sizeoftlvs); + int (*resolve) (struct net_device *dev, u8 *dst_mac, int force, + u8 **tlvs, u32 *sizeoftlvs); + int (*associate_req) (struct net_device *dev, u8 *lan_dst, + u8 *tlvs, u32 sizeoftlvs); + void (*associate_indicator) (struct net_device *dev, u8 *mac_addr, + u8 *tlvs, u32 sizeoftlvs); }; /* * ATM LAN Emulation supports both LLC & Dix Ethernet EtherType * frames. + * * 1. Dix Ethernet EtherType frames encoded by placing EtherType * field in h_type field. Data follows immediatelly after header. * 2. LLC Data frames whose total length, including LLC field and data, @@ -71,72 +70,88 @@ struct lane2_ops { #define LEC_ARP_TABLE_SIZE 16 struct lec_priv { - struct net_device_stats stats; - unsigned short lecid; /* Lecid of this client */ - struct lec_arp_table *lec_arp_empty_ones; - /* Used for storing VCC's that don't have a MAC address attached yet */ - struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE]; - /* Actual LE ARP table */ - struct lec_arp_table *lec_no_forward; - /* Used for storing VCC's (and forward packets from) which are to - age out by not using them to forward packets. - This is because to some LE clients there will be 2 VCCs. Only - one of them gets used. */ - struct lec_arp_table *mcast_fwds; - /* With LANEv2 it is possible that BUS (or a special multicast server) - establishes multiple Multicast Forward VCCs to us. This list - collects all those VCCs. LANEv1 client has only one item in this - list. These entries are not aged out. */ - spinlock_t lec_arp_lock; - struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ - struct atm_vcc *lecd; - struct timer_list lec_arp_timer; - /* C10 */ - unsigned int maximum_unknown_frame_count; -/* Within the period of time defined by this variable, the client will send - no more than C10 frames to BUS for a given unicast destination. (C11) */ - unsigned long max_unknown_frame_time; -/* If no traffic has been sent in this vcc for this period of time, - vcc will be torn down (C12)*/ - unsigned long vcc_timeout_period; -/* An LE Client MUST not retry an LE_ARP_REQUEST for a - given frame's LAN Destination more than maximum retry count times, - after the first LEC_ARP_REQUEST (C13)*/ - unsigned short max_retry_count; -/* Max time the client will maintain an entry in its arp cache in - absence of a verification of that relationship (C17)*/ - unsigned long aging_time; -/* Max time the client will maintain an entry in cache when - topology change flag is true (C18) */ - unsigned long forward_delay_time; -/* Topology change flag (C19)*/ - int topology_change; -/* Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE - cycle to take (C20)*/ - unsigned long arp_response_time; -/* Time limit ot wait to receive an LE_FLUSH_RESPONSE after the - LE_FLUSH_REQUEST has been sent before taking recover action. (C21)*/ - unsigned long flush_timeout; -/* The time since sending a frame to the bus after which the - LE Client may assume that the frame has been either discarded or - delivered to the recipient (C22) */ - unsigned long path_switching_delay; + struct net_device_stats stats; + unsigned short lecid; /* Lecid of this client */ + struct hlist_head lec_arp_empty_ones; + /* Used for storing VCC's that don't have a MAC address attached yet */ + struct hlist_head lec_arp_tables[LEC_ARP_TABLE_SIZE]; + /* Actual LE ARP table */ + struct hlist_head lec_no_forward; + /* + * Used for storing VCC's (and forward packets from) which are to + * age out by not using them to forward packets. + * This is because to some LE clients there will be 2 VCCs. Only + * one of them gets used. + */ + struct hlist_head mcast_fwds; + /* + * With LANEv2 it is possible that BUS (or a special multicast server) + * establishes multiple Multicast Forward VCCs to us. This list + * collects all those VCCs. LANEv1 client has only one item in this + * list. These entries are not aged out. + */ + spinlock_t lec_arp_lock; + struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ + struct atm_vcc *lecd; + struct work_struct lec_arp_work; /* C10 */ + unsigned int maximum_unknown_frame_count; + /* + * Within the period of time defined by this variable, the client will send + * no more than C10 frames to BUS for a given unicast destination. (C11) + */ + unsigned long max_unknown_frame_time; + /* + * If no traffic has been sent in this vcc for this period of time, + * vcc will be torn down (C12) + */ + unsigned long vcc_timeout_period; + /* + * An LE Client MUST not retry an LE_ARP_REQUEST for a + * given frame's LAN Destination more than maximum retry count times, + * after the first LEC_ARP_REQUEST (C13) + */ + unsigned short max_retry_count; + /* + * Max time the client will maintain an entry in its arp cache in + * absence of a verification of that relationship (C17) + */ + unsigned long aging_time; + /* + * Max time the client will maintain an entry in cache when + * topology change flag is true (C18) + */ + unsigned long forward_delay_time; /* Topology change flag (C19) */ + int topology_change; + /* + * Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE + * cycle to take (C20) + */ + unsigned long arp_response_time; + /* + * Time limit ot wait to receive an LE_FLUSH_RESPONSE after the + * LE_FLUSH_REQUEST has been sent before taking recover action. (C21) + */ + unsigned long flush_timeout; + /* The time since sending a frame to the bus after which the + * LE Client may assume that the frame has been either discarded or + * delivered to the recipient (C22) + */ + unsigned long path_switching_delay; - u8 *tlvs; /* LANE2: TLVs are new */ - u32 sizeoftlvs; /* The size of the tlv array in bytes */ - int lane_version; /* LANE2 */ - int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ - struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ - int is_proxy; /* bridge between ATM and Ethernet */ - int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */ + u8 *tlvs; /* LANE2: TLVs are new */ + u32 sizeoftlvs; /* The size of the tlv array in bytes */ + int lane_version; /* LANE2 */ + int itfnum; /* e.g. 2 for lec2, 5 for lec5 */ + struct lane2_ops *lane2_ops; /* can be NULL for LANE v1 */ + int is_proxy; /* bridge between ATM and Ethernet */ + int is_trdev; /* Device type, 0 = Ethernet, 1 = TokenRing */ }; struct lec_vcc_priv { - void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); + void (*old_pop) (struct atm_vcc *vcc, struct sk_buff *skb); int xoff; }; #define LEC_VCC_PRIV(vcc) ((struct lec_vcc_priv *)((vcc)->user_back)) -#endif /* _LEC_H_ */ - +#endif /* _LEC_H_ */ diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h index 397448094648..ec67435a40a6 100644 --- a/net/atm/lec_arpc.h +++ b/net/atm/lec_arpc.h @@ -1,92 +1,96 @@ /* * Lec arp cache - * Marko Kiiskila mkiiskila@yahoo.com * + * Marko Kiiskila <mkiiskila@yahoo.com> */ -#ifndef _LEC_ARP_H -#define _LEC_ARP_H +#ifndef _LEC_ARP_H_ +#define _LEC_ARP_H_ #include <linux/atm.h> #include <linux/atmdev.h> #include <linux/if_ether.h> #include <linux/atmlec.h> struct lec_arp_table { - struct lec_arp_table *next; /* Linked entry list */ - unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ - unsigned char mac_addr[ETH_ALEN]; /* Mac address */ - int is_rdesc; /* Mac address is a route descriptor */ - struct atm_vcc *vcc; /* Vcc this entry is attached */ - struct atm_vcc *recv_vcc; /* Vcc we receive data from */ - void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); - /* Push that leads to daemon */ - void (*old_recv_push)(struct atm_vcc *vcc, struct sk_buff *skb); - /* Push that leads to daemon */ - void (*old_close)(struct atm_vcc *vcc); - /* We want to see when this - * vcc gets closed */ - unsigned long last_used; /* For expiry */ - unsigned long timestamp; /* Used for various timestamping - * things: - * 1. FLUSH started - * (status=ESI_FLUSH_PENDING) - * 2. Counting to - * max_unknown_frame_time - * (status=ESI_ARP_PENDING|| - * status=ESI_VC_PENDING) - */ - unsigned char no_tries; /* No of times arp retry has been - tried */ - unsigned char status; /* Status of this entry */ - unsigned short flags; /* Flags for this entry */ - unsigned short packets_flooded; /* Data packets flooded */ - unsigned long flush_tran_id; /* Transaction id in flush protocol */ - struct timer_list timer; /* Arping timer */ - struct lec_priv *priv; /* Pointer back */ + struct hlist_node next; /* Linked entry list */ + unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */ + unsigned char mac_addr[ETH_ALEN]; /* Mac address */ + int is_rdesc; /* Mac address is a route descriptor */ + struct atm_vcc *vcc; /* Vcc this entry is attached */ + struct atm_vcc *recv_vcc; /* Vcc we receive data from */ - u8 *tlvs; /* LANE2: Each MAC address can have TLVs */ - u32 sizeoftlvs; /* associated with it. sizeoftlvs tells the */ - /* the length of the tlvs array */ - struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ + void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb); + /* Push that leads to daemon */ + + void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb); + /* Push that leads to daemon */ + + unsigned long last_used; /* For expiry */ + unsigned long timestamp; /* Used for various timestamping things: + * 1. FLUSH started + * (status=ESI_FLUSH_PENDING) + * 2. Counting to + * max_unknown_frame_time + * (status=ESI_ARP_PENDING|| + * status=ESI_VC_PENDING) + */ + unsigned char no_tries; /* No of times arp retry has been tried */ + unsigned char status; /* Status of this entry */ + unsigned short flags; /* Flags for this entry */ + unsigned short packets_flooded; /* Data packets flooded */ + unsigned long flush_tran_id; /* Transaction id in flush protocol */ + struct timer_list timer; /* Arping timer */ + struct lec_priv *priv; /* Pointer back */ + u8 *tlvs; + u32 sizeoftlvs; /* + * LANE2: Each MAC address can have TLVs + * associated with it. sizeoftlvs tells the + * the length of the tlvs array + */ + struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ + atomic_t usage; /* usage count */ }; -struct tlv { /* LANE2: Template tlv struct for accessing */ - /* the tlvs in the lec_arp_table->tlvs array*/ - u32 type; - u8 length; - u8 value[255]; +/* + * LANE2: Template tlv struct for accessing + * the tlvs in the lec_arp_table->tlvs array + */ +struct tlv { + u32 type; + u8 length; + u8 value[255]; }; /* Status fields */ -#define ESI_UNKNOWN 0 /* - * Next packet sent to this mac address - * causes ARP-request to be sent - */ -#define ESI_ARP_PENDING 1 /* - * There is no ATM address associated with this - * 48-bit address. The LE-ARP protocol is in - * progress. - */ -#define ESI_VC_PENDING 2 /* - * There is a valid ATM address associated with - * this 48-bit address but there is no VC set - * up to that ATM address. The signaling - * protocol is in process. - */ -#define ESI_FLUSH_PENDING 4 /* - * The LEC has been notified of the FLUSH_START - * status and it is assumed that the flush - * protocol is in process. - */ -#define ESI_FORWARD_DIRECT 5 /* - * Either the Path Switching Delay (C22) has - * elapsed or the LEC has notified the Mapping - * that the flush protocol has completed. In - * either case, it is safe to forward packets - * to this address via the data direct VC. - */ +#define ESI_UNKNOWN 0 /* + * Next packet sent to this mac address + * causes ARP-request to be sent + */ +#define ESI_ARP_PENDING 1 /* + * There is no ATM address associated with this + * 48-bit address. The LE-ARP protocol is in + * progress. + */ +#define ESI_VC_PENDING 2 /* + * There is a valid ATM address associated with + * this 48-bit address but there is no VC set + * up to that ATM address. The signaling + * protocol is in process. + */ +#define ESI_FLUSH_PENDING 4 /* + * The LEC has been notified of the FLUSH_START + * status and it is assumed that the flush + * protocol is in process. + */ +#define ESI_FORWARD_DIRECT 5 /* + * Either the Path Switching Delay (C22) has + * elapsed or the LEC has notified the Mapping + * that the flush protocol has completed. In + * either case, it is safe to forward packets + * to this address via the data direct VC. + */ /* Flag values */ #define LEC_REMOTE_FLAG 0x0001 #define LEC_PERMANENT_FLAG 0x0002 -#endif +#endif /* _LEC_ARP_H_ */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index a48a5d580408..0d2b994af511 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -25,7 +25,6 @@ #include <linux/atmlec.h> #include <linux/atmmpc.h> /* Modular too */ -#include <linux/config.h> #include <linux/module.h> #include "lec.h" @@ -99,11 +98,6 @@ static struct notifier_block mpoa_notifier = { 0 }; -#ifdef CONFIG_PROC_FS -extern int mpc_proc_init(void); -extern void mpc_proc_clean(void); -#endif - struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; static DEFINE_TIMER(mpc_timer, NULL, 0, 0); @@ -259,10 +253,9 @@ static struct mpoa_client *alloc_mpc(void) { struct mpoa_client *mpc; - mpc = kmalloc(sizeof (struct mpoa_client), GFP_KERNEL); + mpc = kzalloc(sizeof (struct mpoa_client), GFP_KERNEL); if (mpc == NULL) return NULL; - memset(mpc, 0, sizeof(struct mpoa_client)); rwlock_init(&mpc->ingress_lock); rwlock_init(&mpc->egress_lock); mpc->next = mpcs; @@ -567,7 +560,6 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) struct atmmpc_ioc ioc_data; in_cache_entry *in_entry; uint32_t ipaddr; - unsigned char *ip; bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmmpc_ioc)); if (bytes_left != 0) { @@ -590,9 +582,8 @@ static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) if (in_entry != NULL) mpc->in_ops->put(in_entry); return -EINVAL; } - ip = (unsigned char*)&in_entry->ctrl_info.in_dst_ip; printk("mpoa: (%s) mpc_vcc_attach: attaching ingress SVC, entry = %u.%u.%u.%u\n", - mpc->dev->name, ip[0], ip[1], ip[2], ip[3]); + mpc->dev->name, NIPQUAD(in_entry->ctrl_info.in_dst_ip)); in_entry->shortcut = vcc; mpc->in_ops->put(in_entry); } else { @@ -623,10 +614,8 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev) dprintk("mpoa: (%s) mpc_vcc_close:\n", dev->name); in_entry = mpc->in_ops->get_by_vcc(vcc, mpc); if (in_entry) { - unsigned char *ip __attribute__ ((unused)) = - (unsigned char *)&in_entry->ctrl_info.in_dst_ip; dprintk("mpoa: (%s) mpc_vcc_close: ingress SVC closed ip = %u.%u.%u.%u\n", - mpc->dev->name, ip[0], ip[1], ip[2], ip[3]); + mpc->dev->name, NIPQUAD(in_entry->ctrl_info.in_dst_ip)); in_entry->shortcut = NULL; mpc->in_ops->put(in_entry); } @@ -1113,10 +1102,9 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) { - unsigned char *ip; - uint32_t dst_ip = msg->content.in_info.in_dst_ip; in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); + dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip)); ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry); if(entry == NULL){ @@ -1162,18 +1150,17 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) { uint32_t dst_ip = msg->content.in_info.in_dst_ip; uint32_t mask = msg->ip_mask; - unsigned char *ip = (unsigned char *)&dst_ip; in_cache_entry *entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask); if(entry == NULL){ printk("mpoa: (%s) ingress_purge_rcvd: purge for a non-existing entry, ", mpc->dev->name); - printk("ip = %u.%u.%u.%u\n", ip[0], ip[1], ip[2], ip[3]); + printk("ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip)); return; } do { dprintk("mpoa: (%s) ingress_purge_rcvd: removing an ingress entry, ip = %u.%u.%u.%u\n" , - mpc->dev->name, ip[0], ip[1], ip[2], ip[3]); + mpc->dev->name, NIPQUAD(dst_ip)); write_lock_bh(&mpc->ingress_lock); mpc->in_ops->remove_entry(entry, mpc); write_unlock_bh(&mpc->ingress_lock); @@ -1442,12 +1429,8 @@ static __init int atm_mpoa_init(void) { register_atm_ioctl(&atm_ioctl_ops); -#ifdef CONFIG_PROC_FS if (mpc_proc_init() != 0) printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); - else - printk(KERN_INFO "mpoa: /proc/mpoa initialized\n"); -#endif printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); @@ -1460,9 +1443,7 @@ static void __exit atm_mpoa_cleanup(void) struct atm_mpoa_qos *qos, *nextqos; struct lec_priv *priv; -#ifdef CONFIG_PROC_FS mpc_proc_clean(); -#endif del_timer(&mpc_timer); unregister_netdevice_notifier(&mpoa_notifier); diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 863ddf6079e1..3c7981a229e8 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -50,4 +50,12 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos); struct seq_file; void atm_mpoa_disp_qos(struct seq_file *m); +#ifdef CONFIG_PROC_FS +int mpc_proc_init(void); +void mpc_proc_clean(void); +#else +#define mpc_proc_init() (0) +#define mpc_proc_clean() do { } while(0) +#endif + #endif /* _MPC_H_ */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 781ed1b9329d..fbf13cdcf46e 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -87,7 +87,6 @@ static in_cache_entry *in_cache_get_by_vcc(struct atm_vcc *vcc, static in_cache_entry *in_cache_add_entry(uint32_t dst_ip, struct mpoa_client *client) { - unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip; in_cache_entry* entry = kmalloc(sizeof(in_cache_entry), GFP_KERNEL); if (entry == NULL) { @@ -95,7 +94,7 @@ static in_cache_entry *in_cache_add_entry(uint32_t dst_ip, return NULL; } - dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", ip[0], ip[1], ip[2], ip[3]); + dprintk("mpoa: mpoa_caches.c: adding an ingress entry, ip = %u.%u.%u.%u\n", NIPQUAD(dst_ip)); memset(entry,0,sizeof(in_cache_entry)); atomic_set(&entry->use, 1); @@ -152,10 +151,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) if( entry->count > mpc->parameters.mpc_p1 && entry->entry_state == INGRESS_INVALID){ - unsigned char *ip __attribute__ ((unused)) = - (unsigned char *)&entry->ctrl_info.in_dst_ip; - - dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, ip[0], ip[1], ip[2], ip[3]); + dprintk("mpoa: (%s) mpoa_caches.c: threshold exceeded for ip %u.%u.%u.%u, sending MPOA res req\n", mpc->dev->name, NIPQUAD(entry->ctrl_info.in_dst_ip)); entry->entry_state = INGRESS_RESOLVING; msg.type = SND_MPOA_RES_RQST; memcpy(msg.MPS_ctrl, mpc->mps_ctrl_addr, ATM_ESA_LEN ); @@ -187,11 +183,9 @@ static void in_cache_remove_entry(in_cache_entry *entry, { struct atm_vcc *vcc; struct k_message msg; - unsigned char *ip; vcc = entry->shortcut; - ip = (unsigned char *)&entry->ctrl_info.in_dst_ip; - dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %u.%u.%u.%u\n",ip[0], ip[1], ip[2], ip[3]); + dprintk("mpoa: mpoa_caches.c: removing an ingress entry, ip = %u.%u.%u.%u\n",NIPQUAD(entry->ctrl_info.in_dst_ip)); if (entry->prev != NULL) entry->prev->next = entry->next; diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 60834b5a14d6..d37b8911b3ab 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #ifdef CONFIG_PROC_FS #include <linux/errno.h> diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 1489067c1e84..19d5dfc0702f 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -34,7 +34,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/atm.h> @@ -288,10 +287,9 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg) if (be.encaps != PPPOATM_ENCAPS_AUTODETECT && be.encaps != PPPOATM_ENCAPS_VC && be.encaps != PPPOATM_ENCAPS_LLC) return -EINVAL; - pvcc = kmalloc(sizeof(*pvcc), GFP_KERNEL); + pvcc = kzalloc(sizeof(*pvcc), GFP_KERNEL); if (pvcc == NULL) return -ENOMEM; - memset(pvcc, 0, sizeof(*pvcc)); pvcc->atmvcc = atmvcc; pvcc->old_push = atmvcc->push; pvcc->old_pop = atmvcc->pop; diff --git a/net/atm/proc.c b/net/atm/proc.c index 4041054e5282..91fe5f53ff11 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -8,7 +8,6 @@ * the reader. */ -#include <linux/config.h> #include <linux/module.h> /* for EXPORT_SYMBOL */ #include <linux/string.h> #include <linux/types.h> @@ -508,7 +507,7 @@ err_out: goto out; } -void __exit atm_proc_exit(void) +void atm_proc_exit(void) { atm_proc_dirs_remove(); } diff --git a/net/atm/pvc.c b/net/atm/pvc.c index f2c541774dcd..b2148b43a426 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -3,7 +3,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/net.h> /* struct socket, struct proto_ops */ #include <linux/atm.h> /* ATM stuff */ #include <linux/atmdev.h> /* ATM devices */ diff --git a/net/atm/resources.c b/net/atm/resources.c index 18ac80698f83..529f7e64aa2c 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -8,7 +8,6 @@ * use the default destruct function initialized by sock_init_data */ -#include <linux/config.h> #include <linux/ctype.h> #include <linux/string.h> #include <linux/atmdev.h> @@ -34,10 +33,9 @@ static struct atm_dev *__alloc_atm_dev(const char *type) { struct atm_dev *dev; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; - memset(dev, 0, sizeof(*dev)); dev->type = type; dev->signal = ATM_PHY_SIG_UNKNOWN; dev->link_rate = ATM_OC3_PCR; @@ -114,14 +112,27 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, printk(KERN_ERR "atm_dev_register: " "atm_proc_dev_register failed for dev %s\n", type); - mutex_unlock(&atm_dev_mutex); - kfree(dev); - return NULL; + goto out_fail; + } + + if (atm_register_sysfs(dev) < 0) { + printk(KERN_ERR "atm_dev_register: " + "atm_register_sysfs failed for dev %s\n", + type); + atm_proc_dev_deregister(dev); + goto out_fail; } + list_add_tail(&dev->dev_list, &atm_devs); - mutex_unlock(&atm_dev_mutex); +out: + mutex_unlock(&atm_dev_mutex); return dev; + +out_fail: + kfree(dev); + dev = NULL; + goto out; } @@ -140,6 +151,7 @@ void atm_dev_deregister(struct atm_dev *dev) mutex_unlock(&atm_dev_mutex); atm_dev_release_vccs(dev); + atm_unregister_sysfs(dev); atm_proc_dev_deregister(dev); atm_dev_put(dev); diff --git a/net/atm/resources.h b/net/atm/resources.h index ac7222fee7a8..1d004aaaeec1 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -6,7 +6,6 @@ #ifndef NET_ATM_RESOURCES_H #define NET_ATM_RESOURCES_H -#include <linux/config.h> #include <linux/atmdev.h> #include <linux/mutex.h> @@ -43,4 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev) #endif /* CONFIG_PROC_FS */ +int atm_register_sysfs(struct atm_dev *adev); +void atm_unregister_sysfs(struct atm_dev *adev); #endif diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a2e0dd047e9f..000695c48583 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -13,7 +13,6 @@ * Copyright (C) Hans Alblas PE1AYX (hans@esrac.ele.tue.nl) * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/errno.h> @@ -146,7 +145,7 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, ax25_cb *s; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(s, node, &ax25_list) { if ((s->iamdigi && !digi) || (!s->iamdigi && digi)) continue; @@ -155,12 +154,12 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, /* If device is null we match any device */ if (s->ax25_dev == NULL || s->ax25_dev->dev == dev) { sock_hold(s->sk); - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return s->sk; } } } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return NULL; } @@ -175,7 +174,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, ax25_cb *s; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(s, node, &ax25_list) { if (s->sk && !ax25cmp(&s->source_addr, my_addr) && !ax25cmp(&s->dest_addr, dest_addr) && @@ -186,7 +185,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, } } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return sk; } @@ -236,7 +235,7 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) struct sk_buff *copy; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(s, node, &ax25_list) { if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->sk_type == SOCK_RAW && @@ -249,7 +248,7 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) kfree_skb(copy); } } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); } /* @@ -487,10 +486,9 @@ ax25_cb *ax25_create_cb(void) { ax25_cb *ax25; - if ((ax25 = kmalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL) + if ((ax25 = kzalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL) return NULL; - memset(ax25, 0x00, sizeof(*ax25)); atomic_set(&ax25->refcount, 1); skb_queue_head_init(&ax25->write_queue); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index dab77efe34a6..b787678220ff 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -56,15 +55,13 @@ void ax25_dev_device_up(struct net_device *dev) { ax25_dev *ax25_dev; - if ((ax25_dev = kmalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) { + if ((ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) { printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n"); return; } ax25_unregister_sysctl(); - memset(ax25_dev, 0x00, sizeof(*ax25_dev)); - dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold(dev); diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 1d4ab641f82b..4d22d4430ec8 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -80,7 +80,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_start_t3timer(ax25); ax25_ds_set_timer(ax25->ax25_dev); - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(ax25o, node, &ax25_list) { if (ax25o == ax25) continue; @@ -106,7 +106,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) if (ax25o->state != AX25_STATE_0) ax25_start_t3timer(ax25o); } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); } void ax25_ds_establish_data_link(ax25_cb *ax25) @@ -162,13 +162,13 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev) int res = 0; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(ax25, node, &ax25_list) if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) { res = 1; break; } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return res; } diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 5961459935eb..4f44185955c7 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -85,7 +85,7 @@ static void ax25_ds_timeout(unsigned long arg) return; } - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(ax25, node, &ax25_list) { if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) continue; @@ -93,7 +93,7 @@ static void ax25_ds_timeout(unsigned long arg) ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); ax25_disconnect(ax25, ETIMEDOUT); } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); ax25_dev_dama_off(ax25_dev); } diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 3bb152710b77..07ac0207eb69 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -67,10 +66,10 @@ int ax25_protocol_register(unsigned int pid, protocol->pid = pid; protocol->func = func; - write_lock(&protocol_list_lock); + write_lock_bh(&protocol_list_lock); protocol->next = protocol_list; protocol_list = protocol; - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); return 1; } @@ -81,16 +80,16 @@ void ax25_protocol_release(unsigned int pid) { struct protocol_struct *s, *protocol; - write_lock(&protocol_list_lock); + write_lock_bh(&protocol_list_lock); protocol = protocol_list; if (protocol == NULL) { - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); return; } if (protocol->pid == pid) { protocol_list = protocol->next; - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); kfree(protocol); return; } @@ -99,14 +98,14 @@ void ax25_protocol_release(unsigned int pid) if (protocol->next->pid == pid) { s = protocol->next; protocol->next = protocol->next->next; - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); kfree(s); return; } protocol = protocol->next; } - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); } EXPORT_SYMBOL(ax25_protocol_release); @@ -267,13 +266,13 @@ int ax25_protocol_is_registered(unsigned int pid) struct protocol_struct *protocol; int res = 0; - read_lock(&protocol_list_lock); + read_lock_bh(&protocol_list_lock); for (protocol = protocol_list; protocol != NULL; protocol = protocol->next) if (protocol->pid == pid) { res = 1; break; } - read_unlock(&protocol_list_lock); + read_unlock_bh(&protocol_list_lock); return res; } diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 4cf87540fb3a..e9d94291581e 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -102,8 +102,8 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb) int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) { int (*func)(struct sk_buff *, ax25_cb *); - volatile int queued = 0; unsigned char pid; + int queued = 0; if (skb == NULL) return 0; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index a0b534f80f17..136c3aefa9de 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -104,11 +103,13 @@ int ax25_rebuild_header(struct sk_buff *skb) { struct sk_buff *ourskb; unsigned char *bp = skb->data; - struct net_device *dev; + ax25_route *route; + struct net_device *dev = NULL; ax25_address *src, *dst; + ax25_digi *digipeat = NULL; ax25_dev *ax25_dev; - ax25_route _route, *route = &_route; ax25_cb *ax25; + char ip_mode = ' '; dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); @@ -116,8 +117,12 @@ int ax25_rebuild_header(struct sk_buff *skb) if (arp_find(bp + 1, skb)) return 1; - route = ax25_rt_find_route(route, dst, NULL); - dev = route->dev; + route = ax25_get_route(dst, NULL); + if (route) { + digipeat = route->digipeat; + dev = route->dev; + ip_mode = route->ip_mode; + }; if (dev == NULL) dev = skb->dev; @@ -127,7 +132,7 @@ int ax25_rebuild_header(struct sk_buff *skb) } if (bp[16] == AX25_P_IP) { - if (route->ip_mode == 'V' || (route->ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) { + if (ip_mode == 'V' || (ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) { /* * We copy the buffer and release the original thereby * keeping it straight @@ -173,7 +178,7 @@ int ax25_rebuild_header(struct sk_buff *skb) ourskb, ax25_dev->values[AX25_VALUES_PACLEN], &src_c, - &dst_c, route->digipeat, dev); + &dst_c, digipeat, dev); if (ax25) { ax25_cb_put(ax25); } @@ -191,7 +196,7 @@ int ax25_rebuild_header(struct sk_buff *skb) skb_pull(skb, AX25_KISS_HEADER_LEN); - if (route->digipeat != NULL) { + if (digipeat != NULL) { if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) { kfree_skb(skb); goto put; @@ -203,7 +208,8 @@ int ax25_rebuild_header(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - ax25_put_route(route); + if (route) + ax25_put_route(route); return 1; } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 5d99852b239c..d7736e585336 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -8,7 +8,6 @@ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 5ac98250797b..51b7bdaf27eb 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -41,8 +41,6 @@ static ax25_route *ax25_route_list; static DEFINE_RWLOCK(ax25_route_lock); -static ax25_route *ax25_get_route(ax25_address *, struct net_device *); - void ax25_rt_device_down(struct net_device *dev) { ax25_route *s, *t, *ax25_rt; @@ -115,7 +113,7 @@ static int ax25_rt_add(struct ax25_routes_struct *route) return -ENOMEM; } - atomic_set(&ax25_rt->ref, 0); + atomic_set(&ax25_rt->refcount, 1); ax25_rt->callsign = route->dest_addr; ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; @@ -140,23 +138,10 @@ static int ax25_rt_add(struct ax25_routes_struct *route) return 0; } -static void ax25_rt_destroy(ax25_route *ax25_rt) +void __ax25_put_route(ax25_route *ax25_rt) { - if (atomic_read(&ax25_rt->ref) == 0) { - kfree(ax25_rt->digipeat); - kfree(ax25_rt); - return; - } - - /* - * Uh... Route is still in use; we can't yet destroy it. Retry later. - */ - init_timer(&ax25_rt->timer); - ax25_rt->timer.data = (unsigned long) ax25_rt; - ax25_rt->timer.function = (void *) ax25_rt_destroy; - ax25_rt->timer.expires = jiffies + 5 * HZ; - - add_timer(&ax25_rt->timer); + kfree(ax25_rt->digipeat); + kfree(ax25_rt); } static int ax25_rt_del(struct ax25_routes_struct *route) @@ -177,12 +162,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route) ax25cmp(&route->dest_addr, &s->callsign) == 0) { if (ax25_route_list == s) { ax25_route_list = s->next; - ax25_rt_destroy(s); + ax25_put_route(s); } else { for (t = ax25_route_list; t != NULL; t = t->next) { if (t->next == s) { t->next = s->next; - ax25_rt_destroy(s); + ax25_put_route(s); break; } } @@ -362,7 +347,7 @@ struct file_operations ax25_route_fops = { * * Only routes with a reference count of zero can be destroyed. */ -static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) +ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { ax25_route *ax25_spe_rt = NULL; ax25_route *ax25_def_rt = NULL; @@ -392,7 +377,7 @@ static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) ax25_rt = ax25_spe_rt; if (ax25_rt != NULL) - atomic_inc(&ax25_rt->ref); + ax25_hold_route(ax25_rt); read_unlock(&ax25_route_lock); @@ -467,24 +452,6 @@ put: return 0; } -ax25_route *ax25_rt_find_route(ax25_route * route, ax25_address *addr, - struct net_device *dev) -{ - ax25_route *ax25_rt; - - if ((ax25_rt = ax25_get_route(addr, dev))) - return ax25_rt; - - route->next = NULL; - atomic_set(&route->ref, 1); - route->callsign = *addr; - route->dev = dev; - route->digipeat = NULL; - route->ip_mode = ' '; - - return route; -} - struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) { diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index ec254057f212..72594867fab6 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -12,7 +12,6 @@ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index bdb64c36df12..867d42537979 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -6,7 +6,6 @@ * * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com) */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/spinlock.h> @@ -204,13 +203,11 @@ void ax25_register_sysctl(void) for (ax25_table_size = sizeof(ctl_table), ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) ax25_table_size += sizeof(ctl_table); - if ((ax25_table = kmalloc(ax25_table_size, GFP_ATOMIC)) == NULL) { + if ((ax25_table = kzalloc(ax25_table_size, GFP_ATOMIC)) == NULL) { spin_unlock_bh(&ax25_dev_lock); return; } - memset(ax25_table, 0x00, ax25_table_size); - for (n = 0, ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) { ctl_table *child = kmalloc(sizeof(ax25_param_table), GFP_ATOMIC); if (!child) { diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 469eda0f0dfd..305a099b7477 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -24,7 +24,6 @@ /* Bluetooth address family and sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -49,7 +48,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.8" +#define VERSION "2.10" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -277,7 +276,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { - err = -EAGAIN; + err = -EINPROGRESS; break; } @@ -308,13 +307,21 @@ static struct net_proto_family bt_sock_family_ops = { static int __init bt_init(void) { + int err; + BT_INFO("Core ver %s", VERSION); - sock_register(&bt_sock_family_ops); + err = bt_sysfs_init(); + if (err < 0) + return err; - BT_INFO("HCI device and connection manager initialized"); + err = sock_register(&bt_sock_family_ops); + if (err < 0) { + bt_sysfs_cleanup(); + return err; + } - bt_sysfs_init(); + BT_INFO("HCI device and connection manager initialized"); hci_sock_init(); @@ -325,9 +332,9 @@ static void __exit bt_exit(void) { hci_sock_cleanup(); - bt_sysfs_cleanup(); - sock_unregister(PF_BLUETOOTH); + + bt_sysfs_cleanup(); } subsys_initcall(bt_init); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d908d49dc9f8..2312d050eeed 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -29,7 +29,6 @@ * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> @@ -52,6 +51,7 @@ #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include "bnep.h" @@ -516,6 +516,26 @@ static int bnep_session(void *arg) return 0; } +static struct device *bnep_get_device(struct bnep_session *session) +{ + bdaddr_t *src = &bt_sk(session->sock->sk)->src; + bdaddr_t *dst = &bt_sk(session->sock->sk)->dst; + struct hci_dev *hdev; + struct hci_conn *conn; + + hdev = hci_get_route(dst, src); + if (!hdev) + return NULL; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); + if (!conn) + return NULL; + + hci_dev_put(hdev); + + return &conn->dev; +} + int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) { struct net_device *dev; @@ -535,7 +555,6 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) if (!dev) return -ENOMEM; - down_write(&bnep_session_sem); ss = __bnep_get_session(dst); @@ -552,7 +571,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) memcpy(s->eh.h_source, &dst, ETH_ALEN); memcpy(dev->dev_addr, s->eh.h_dest, ETH_ALEN); - s->dev = dev; + s->dev = dev; s->sock = sock; s->role = req->role; s->state = BT_CONNECTED; @@ -569,6 +588,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) bnep_set_default_proto_filter(s); #endif + SET_NETDEV_DEV(dev, bnep_get_device(s)); + err = register_netdev(dev); if (err) { goto failed; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 921204f95f4a..7f7b27db6a8f 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -29,7 +29,6 @@ * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/socket.h> diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 2bfe796cf05d..28c55835422a 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -28,7 +28,6 @@ * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index b2e7e38531c6..be04e9fb11f6 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -76,15 +75,13 @@ static struct cmtp_application *cmtp_application_add(struct cmtp_session *session, __u16 appl) { - struct cmtp_application *app = kmalloc(sizeof(*app), GFP_KERNEL); + struct cmtp_application *app = kzalloc(sizeof(*app), GFP_KERNEL); BT_DBG("session %p application %p appl %d", session, app, appl); if (!app) return NULL; - memset(app, 0, sizeof(*app)); - app->state = BT_OPEN; app->appl = appl; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 901eff7ebe74..b81a01c64aea 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -336,10 +335,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) baswap(&src, &bt_sk(sock->sk)->src); baswap(&dst, &bt_sk(sock->sk)->dst); - session = kmalloc(sizeof(struct cmtp_session), GFP_KERNEL); + session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); if (!session) return -ENOMEM; - memset(session, 0, sizeof(struct cmtp_session)); down_write(&cmtp_session_sem); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 8f8fad23f78a..10ad7fd91d83 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f812ed129e58..90e3a285a17e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -24,7 +24,6 @@ /* Bluetooth HCI connection handling. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -85,6 +84,20 @@ static void hci_acl_connect(struct hci_conn *conn) hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp); } +static void hci_acl_connect_cancel(struct hci_conn *conn) +{ + struct hci_cp_create_conn_cancel cp; + + BT_DBG("%p", conn); + + if (conn->hdev->hci_ver < 2) + return; + + bacpy(&cp.bdaddr, &conn->dst); + hci_send_cmd(conn->hdev, OGF_LINK_CTL, + OCF_CREATE_CONN_CANCEL, sizeof(cp), &cp); +} + void hci_acl_disconn(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; @@ -95,7 +108,8 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) cp.handle = __cpu_to_le16(conn->handle); cp.reason = reason; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_DISCONNECT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, OGF_LINK_CTL, + OCF_DISCONNECT, sizeof(cp), &cp); } void hci_add_sco(struct hci_conn *conn, __u16 handle) @@ -116,8 +130,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) static void hci_conn_timeout(unsigned long arg) { - struct hci_conn *conn = (void *)arg; - struct hci_dev *hdev = conn->hdev; + struct hci_conn *conn = (void *) arg; + struct hci_dev *hdev = conn->hdev; BT_DBG("conn %p state %d", conn, conn->state); @@ -125,19 +139,29 @@ static void hci_conn_timeout(unsigned long arg) return; hci_dev_lock(hdev); - if (conn->state == BT_CONNECTED) + + switch (conn->state) { + case BT_CONNECT: + hci_acl_connect_cancel(conn); + break; + case BT_CONNECTED: hci_acl_disconn(conn, 0x13); - else + break; + default: conn->state = BT_CLOSED; + break; + } + hci_dev_unlock(hdev); - return; } -static void hci_conn_init_timer(struct hci_conn *conn) +static void hci_conn_idle(unsigned long arg) { - init_timer(&conn->timer); - conn->timer.function = hci_conn_timeout; - conn->timer.data = (unsigned long)conn; + struct hci_conn *conn = (void *) arg; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + hci_conn_enter_sniff_mode(conn); } struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) @@ -146,17 +170,27 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) BT_DBG("%s dst %s", hdev->name, batostr(dst)); - if (!(conn = kmalloc(sizeof(struct hci_conn), GFP_ATOMIC))) + conn = kzalloc(sizeof(struct hci_conn), GFP_ATOMIC); + if (!conn) return NULL; - memset(conn, 0, sizeof(struct hci_conn)); bacpy(&conn->dst, dst); - conn->type = type; conn->hdev = hdev; + conn->type = type; + conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; + conn->power_save = 1; + skb_queue_head_init(&conn->data_q); - hci_conn_init_timer(conn); + + init_timer(&conn->disc_timer); + conn->disc_timer.function = hci_conn_timeout; + conn->disc_timer.data = (unsigned long) conn; + + init_timer(&conn->idle_timer); + conn->idle_timer.function = hci_conn_idle; + conn->idle_timer.data = (unsigned long) conn; atomic_set(&conn->refcnt, 0); @@ -168,6 +202,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); + hci_conn_add_sysfs(conn); + tasklet_enable(&hdev->tx_task); return conn; @@ -179,7 +215,9 @@ int hci_conn_del(struct hci_conn *conn) BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle); - hci_conn_del_timer(conn); + del_timer(&conn->idle_timer); + + del_timer(&conn->disc_timer); if (conn->type == SCO_LINK) { struct hci_conn *acl = conn->link; @@ -198,6 +236,8 @@ int hci_conn_del(struct hci_conn *conn) tasklet_disable(&hdev->tx_task); + hci_conn_del_sysfs(conn); + hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); @@ -208,7 +248,9 @@ int hci_conn_del(struct hci_conn *conn) hci_dev_put(hdev); - kfree(conn); + /* will free via device release */ + put_device(&conn->dev); + return 0; } @@ -365,6 +407,70 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) } EXPORT_SYMBOL(hci_conn_switch_role); +/* Enter active mode */ +void hci_conn_enter_active_mode(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + if (test_bit(HCI_RAW, &hdev->flags)) + return; + + if (conn->mode != HCI_CM_SNIFF || !conn->power_save) + goto timer; + + if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + struct hci_cp_exit_sniff_mode cp; + cp.handle = __cpu_to_le16(conn->handle); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp); + } + +timer: + if (hdev->idle_timeout > 0) + mod_timer(&conn->idle_timer, + jiffies + msecs_to_jiffies(hdev->idle_timeout)); +} + +/* Enter sniff mode */ +void hci_conn_enter_sniff_mode(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + if (test_bit(HCI_RAW, &hdev->flags)) + return; + + if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn)) + return; + + if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF)) + return; + + if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) { + struct hci_cp_sniff_subrate cp; + cp.handle = __cpu_to_le16(conn->handle); + cp.max_latency = __constant_cpu_to_le16(0); + cp.min_remote_timeout = __constant_cpu_to_le16(0); + cp.min_local_timeout = __constant_cpu_to_le16(0); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_SNIFF_SUBRATE, sizeof(cp), &cp); + } + + if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + struct hci_cp_sniff_mode cp; + cp.handle = __cpu_to_le16(conn->handle); + cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval); + cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval); + cp.attempt = __constant_cpu_to_le16(4); + cp.timeout = __constant_cpu_to_le16(1); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_SNIFF_MODE, sizeof(cp), &cp); + } +} + /* Drop all connection on the device */ void hci_conn_hash_flush(struct hci_dev *hdev) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a49a6975092d..338ae977a31b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -24,7 +24,6 @@ /* Bluetooth HCI core. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> @@ -207,6 +206,9 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Read Local Supported Features */ hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES, 0, NULL); + /* Read Local Version */ + hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_VERSION, 0, NULL); + /* Read Buffer Size (ACL mtu, max pkt, etc.) */ hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE, 0, NULL); @@ -337,9 +339,8 @@ void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data) if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) { /* Entry not in the cache. Add new one. */ - if (!(e = kmalloc(sizeof(struct inquiry_entry), GFP_ATOMIC))) + if (!(e = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC))) return; - memset(e, 0, sizeof(struct inquiry_entry)); e->next = cache->list; cache->list = e; } @@ -412,7 +413,7 @@ int hci_inquiry(void __user *arg) } hci_dev_unlock_bh(hdev); - timeo = ir.length * 2 * HZ; + timeo = ir.length * msecs_to_jiffies(2000); if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0) goto done; @@ -480,7 +481,8 @@ int hci_dev_open(__u16 dev) set_bit(HCI_INIT, &hdev->flags); //__hci_request(hdev, hci_reset_req, 0, HZ); - ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_request(hdev, hci_init_req, 0, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); clear_bit(HCI_INIT, &hdev->flags); } @@ -547,7 +549,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); if (!test_bit(HCI_RAW, &hdev->flags)) { set_bit(HCI_INIT, &hdev->flags); - __hci_request(hdev, hci_reset_req, 0, HZ/4); + __hci_request(hdev, hci_reset_req, 0, + msecs_to_jiffies(250)); clear_bit(HCI_INIT, &hdev->flags); } @@ -620,7 +623,8 @@ int hci_dev_reset(__u16 dev) hdev->acl_cnt = 0; hdev->sco_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) - ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_request(hdev, hci_reset_req, 0, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); done: tasklet_enable(&hdev->tx_task); @@ -658,7 +662,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: - err = hci_request(hdev, hci_auth_req, dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_auth_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETENCRYPT: @@ -669,18 +674,19 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ - err = hci_request(hdev, hci_auth_req, - dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_auth_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); if (err) break; } - err = hci_request(hdev, hci_encrypt_req, - dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETSCAN: - err = hci_request(hdev, hci_scan_req, dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_scan_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETPTYPE: @@ -796,12 +802,10 @@ struct hci_dev *hci_alloc_dev(void) { struct hci_dev *hdev; - hdev = kmalloc(sizeof(struct hci_dev), GFP_KERNEL); + hdev = kzalloc(sizeof(struct hci_dev), GFP_KERNEL); if (!hdev) return NULL; - memset(hdev, 0, sizeof(struct hci_dev)); - skb_queue_head_init(&hdev->driver_init); return hdev; @@ -813,8 +817,8 @@ void hci_free_dev(struct hci_dev *hdev) { skb_queue_purge(&hdev->driver_init); - /* will free via class release */ - class_device_put(&hdev->class_dev); + /* will free via device release */ + put_device(&hdev->dev); } EXPORT_SYMBOL(hci_free_dev); @@ -849,6 +853,10 @@ int hci_register_dev(struct hci_dev *hdev) hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->link_mode = (HCI_LM_ACCEPT); + hdev->idle_timeout = 0; + hdev->sniff_max_interval = 800; + hdev->sniff_min_interval = 80; + tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev); tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev); tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev); @@ -1221,6 +1229,9 @@ static inline void hci_sched_acl(struct hci_dev *hdev) while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); + + hci_conn_enter_active_mode(conn); + hci_send_frame(skb); hdev->acl_last_tx = jiffies; @@ -1299,6 +1310,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) if (conn) { register struct hci_proto *hp; + hci_conn_enter_active_mode(conn); + /* Send to upper protocol */ if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) { hp->recv_acldata(conn, skb, flags); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index eb64555d1fb3..d43d0c890975 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -24,7 +24,6 @@ /* Bluetooth HCI event handling. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -63,6 +62,7 @@ static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb switch (ocf) { case OCF_INQUIRY_CANCEL: + case OCF_EXIT_PERIODIC_INQ: status = *((__u8 *) skb->data); if (status) { @@ -84,6 +84,8 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff * { struct hci_conn *conn; struct hci_rp_role_discovery *rd; + struct hci_rp_write_link_policy *lp; + void *sent; BT_DBG("%s ocf 0x%x", hdev->name, ocf); @@ -107,6 +109,27 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff * hci_dev_unlock(hdev); break; + case OCF_WRITE_LINK_POLICY: + sent = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY); + if (!sent) + break; + + lp = (struct hci_rp_write_link_policy *) skb->data; + + if (lp->status) + break; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(lp->handle)); + if (conn) { + __le16 policy = get_unaligned((__le16 *) (sent + 2)); + conn->link_policy = __le16_to_cpu(policy); + } + + hci_dev_unlock(hdev); + break; + default: BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x", hdev->name, ocf); @@ -275,15 +298,33 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb /* Command Complete OGF INFO_PARAM */ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) { - struct hci_rp_read_loc_features *lf; + struct hci_rp_read_loc_version *lv; + struct hci_rp_read_local_features *lf; struct hci_rp_read_buffer_size *bs; struct hci_rp_read_bd_addr *ba; BT_DBG("%s ocf 0x%x", hdev->name, ocf); switch (ocf) { + case OCF_READ_LOCAL_VERSION: + lv = (struct hci_rp_read_loc_version *) skb->data; + + if (lv->status) { + BT_DBG("%s READ_LOCAL_VERSION failed %d", hdev->name, lf->status); + break; + } + + hdev->hci_ver = lv->hci_ver; + hdev->hci_rev = btohs(lv->hci_rev); + hdev->manufacturer = btohs(lv->manufacturer); + + BT_DBG("%s: manufacturer %d hci_ver %d hci_rev %d", hdev->name, + hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); + + break; + case OCF_READ_LOCAL_FEATURES: - lf = (struct hci_rp_read_loc_features *) skb->data; + lf = (struct hci_rp_read_local_features *) skb->data; if (lf->status) { BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status); @@ -306,7 +347,8 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s if (hdev->features[1] & LMP_HV3) hdev->pkt_type |= (HCI_HV3); - BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, lf->features[0], lf->features[1], lf->features[2]); + BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, + lf->features[0], lf->features[1], lf->features[2]); break; @@ -320,9 +362,17 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s } hdev->acl_mtu = __le16_to_cpu(bs->acl_mtu); - hdev->sco_mtu = bs->sco_mtu ? bs->sco_mtu : 64; - hdev->acl_pkts = hdev->acl_cnt = __le16_to_cpu(bs->acl_max_pkt); - hdev->sco_pkts = hdev->sco_cnt = __le16_to_cpu(bs->sco_max_pkt); + hdev->sco_mtu = bs->sco_mtu; + hdev->acl_pkts = __le16_to_cpu(bs->acl_max_pkt); + hdev->sco_pkts = __le16_to_cpu(bs->sco_max_pkt); + + if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { + hdev->sco_mtu = 64; + hdev->sco_pkts = 8; + } + + hdev->acl_cnt = hdev->acl_pkts; + hdev->sco_cnt = hdev->sco_pkts; BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name, hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts); @@ -440,8 +490,46 @@ static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status) BT_DBG("%s ocf 0x%x", hdev->name, ocf); switch (ocf) { + case OCF_SNIFF_MODE: + if (status) { + struct hci_conn *conn; + struct hci_cp_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_SNIFF_MODE); + + if (!cp) + break; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + } + + hci_dev_unlock(hdev); + } + break; + + case OCF_EXIT_SNIFF_MODE: + if (status) { + struct hci_conn *conn; + struct hci_cp_exit_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_EXIT_SNIFF_MODE); + + if (!cp) + break; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + } + + hci_dev_unlock(hdev); + } + break; + default: - BT_DBG("%s Command status: ogf HOST_POLICY ocf %x", hdev->name, ocf); + BT_DBG("%s Command status: ogf LINK_POLICY ocf %x", hdev->name, ocf); break; } } @@ -623,14 +711,16 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk else cp.role = 0x01; /* Remain slave */ - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp); } else { /* Connection rejected */ struct hci_cp_reject_conn_req cp; bacpy(&cp.bdaddr, &ev->bdaddr); cp.reason = 0x0f; - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_REJECT_CONN_REQ, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_REJECT_CONN_REQ, sizeof(cp), &cp); } } @@ -638,7 +728,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data; - struct hci_conn *conn = NULL; + struct hci_conn *conn; BT_DBG("%s", hdev->name); @@ -660,12 +750,21 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (test_bit(HCI_ENCRYPT, &hdev->flags)) conn->link_mode |= HCI_LM_ENCRYPT; + /* Get remote features */ + if (conn->type == ACL_LINK) { + struct hci_cp_read_remote_features cp; + cp.handle = ev->handle; + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_READ_REMOTE_FEATURES, sizeof(cp), &cp); + } + /* Set link policy */ if (conn->type == ACL_LINK && hdev->link_policy) { struct hci_cp_write_link_policy cp; cp.handle = ev->handle; cp.policy = __cpu_to_le16(hdev->link_policy); - hci_send_cmd(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); } /* Set packet type for incoming connection */ @@ -676,7 +775,12 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); + } else { + /* Update disconnect timer */ + hci_conn_hold(conn); + hci_conn_put(conn); } } else conn->state = BT_CLOSED; @@ -704,8 +808,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -714,7 +817,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { conn->state = BT_CLOSED; hci_proto_disconn_ind(conn, ev->reason); @@ -771,7 +874,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data; - struct hci_conn *conn = NULL; + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -794,18 +897,43 @@ static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb hci_dev_unlock(hdev); } +/* Mode Change */ +static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_mode_change *ev = (struct hci_ev_mode_change *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + conn->mode = ev->mode; + conn->interval = __le16_to_cpu(ev->interval); + + if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + if (conn->mode == HCI_CM_ACTIVE) + conn->power_save = 1; + else + conn->power_save = 0; + } + } + + hci_dev_unlock(hdev); +} + /* Authentication Complete */ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_AUTH; @@ -820,8 +948,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s cp.handle = __cpu_to_le16(conn->handle); cp.encrypt = 1; hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_SET_CONN_ENCRYPT, - sizeof(cp), &cp); + OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); } else { clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); hci_encrypt_cfm(conn, ev->status, 0x00); @@ -836,14 +963,13 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) { if (ev->encrypt) @@ -864,14 +990,13 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff * static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_SECURE; @@ -899,18 +1024,35 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff { } +/* Remote Features */ +static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_remote_features *ev = (struct hci_ev_remote_features *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn && !ev->status) { + memcpy(conn->features, ev->features, sizeof(conn->features)); + } + + hci_dev_unlock(hdev); +} + /* Clock Offset */ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn && !ev->status) { struct inquiry_entry *ie; @@ -941,6 +1083,23 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_unlock(hdev); } +/* Sniff Subrate */ +static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_sniff_subrate *ev = (struct hci_ev_sniff_subrate *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + } + + hci_dev_unlock(hdev); +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; @@ -989,6 +1148,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_role_change_evt(hdev, skb); break; + case HCI_EV_MODE_CHANGE: + hci_mode_change_evt(hdev, skb); + break; + case HCI_EV_AUTH_COMPLETE: hci_auth_complete_evt(hdev, skb); break; @@ -1013,6 +1176,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_link_key_notify_evt(hdev, skb); break; + case HCI_EV_REMOTE_FEATURES: + hci_remote_features_evt(hdev, skb); + break; + case HCI_EV_CLOCK_OFFSET: hci_clock_offset_evt(hdev, skb); break; @@ -1021,6 +1188,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_pscan_rep_mode_evt(hdev, skb); break; + case HCI_EV_SNIFF_SUBRATE: + hci_sniff_subrate_evt(hdev, skb); + break; + case HCI_EV_CMD_STATUS: cs = (struct hci_ev_cmd_status *) skb->data; skb_pull(skb, sizeof(cs)); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 97bdec73d17e..1a35d343e08a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -24,7 +24,6 @@ /* Bluetooth HCI sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 0ed38740388c..989b22d9042e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -1,9 +1,10 @@ /* Bluetooth HCI driver model support. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/platform_device.h> + #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -12,35 +13,63 @@ #define BT_DBG(D...) #endif -static ssize_t show_name(struct class_device *cdev, char *buf) +static inline char *typetostr(int type) { - struct hci_dev *hdev = class_get_devdata(cdev); - return sprintf(buf, "%s\n", hdev->name); + switch (type) { + case HCI_VIRTUAL: + return "VIRTUAL"; + case HCI_USB: + return "USB"; + case HCI_PCCARD: + return "PCCARD"; + case HCI_UART: + return "UART"; + case HCI_RS232: + return "RS232"; + case HCI_PCI: + return "PCI"; + case HCI_SDIO: + return "SDIO"; + default: + return "UNKNOWN"; + } } -static ssize_t show_type(struct class_device *cdev, char *buf) +static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); - return sprintf(buf, "%d\n", hdev->type); + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", typetostr(hdev->type)); } -static ssize_t show_address(struct class_device *cdev, char *buf) +static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); bdaddr_t bdaddr; baswap(&bdaddr, &hdev->bdaddr); return sprintf(buf, "%s\n", batostr(&bdaddr)); } -static ssize_t show_flags(struct class_device *cdev, char *buf) +static ssize_t show_manufacturer(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->manufacturer); +} + +static ssize_t show_hci_version(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->hci_ver); +} + +static ssize_t show_hci_revision(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); - return sprintf(buf, "0x%lx\n", hdev->flags); + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->hci_rev); } -static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf) +static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); struct inquiry_cache *cache = &hdev->inq_cache; struct inquiry_entry *e; int n = 0; @@ -62,94 +91,268 @@ static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf) return n; } -static CLASS_DEVICE_ATTR(name, S_IRUGO, show_name, NULL); -static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL); -static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); -static CLASS_DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL); -static CLASS_DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL); +static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->idle_timeout); +} -static struct class_device_attribute *bt_attrs[] = { - &class_device_attr_name, - &class_device_attr_type, - &class_device_attr_address, - &class_device_attr_flags, - &class_device_attr_inquiry_cache, - NULL -}; +static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + char *ptr; + __u32 val; + + val = simple_strtoul(buf, &ptr, 10); + if (ptr == buf) + return -EINVAL; + + if (val != 0 && (val < 500 || val > 3600000)) + return -EINVAL; + + hdev->idle_timeout = val; + + return count; +} -#ifdef CONFIG_HOTPLUG -static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) +static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); - int n, i = 0; + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->sniff_max_interval); +} - envp[i++] = buf; - n = snprintf(buf, size, "INTERFACE=%s", hdev->name) + 1; - buf += n; - size -= n; +static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + char *ptr; + __u16 val; - if ((size <= 0) || (i >= num_envp)) - return -ENOMEM; + val = simple_strtoul(buf, &ptr, 10); + if (ptr == buf) + return -EINVAL; - envp[i] = NULL; - return 0; + if (val < 0x0002 || val > 0xFFFE || val % 2) + return -EINVAL; + + if (val < hdev->sniff_min_interval) + return -EINVAL; + + hdev->sniff_max_interval = val; + + return count; +} + +static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->sniff_min_interval); } -#endif -static void bt_release(struct class_device *cdev) +static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); + char *ptr; + __u16 val; + + val = simple_strtoul(buf, &ptr, 10); + if (ptr == buf) + return -EINVAL; - kfree(hdev); + if (val < 0x0002 || val > 0xFFFE || val % 2) + return -EINVAL; + + if (val > hdev->sniff_max_interval) + return -EINVAL; + + hdev->sniff_min_interval = val; + + return count; } -struct class bt_class = { - .name = "bluetooth", - .release = bt_release, -#ifdef CONFIG_HOTPLUG - .uevent = bt_uevent, -#endif +static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL); +static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL); +static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL); +static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL); + +static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR, + show_idle_timeout, store_idle_timeout); +static DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR, + show_sniff_max_interval, store_sniff_max_interval); +static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, + show_sniff_min_interval, store_sniff_min_interval); + +static struct device_attribute *bt_attrs[] = { + &dev_attr_type, + &dev_attr_address, + &dev_attr_manufacturer, + &dev_attr_hci_version, + &dev_attr_hci_revision, + &dev_attr_inquiry_cache, + &dev_attr_idle_timeout, + &dev_attr_sniff_max_interval, + &dev_attr_sniff_min_interval, + NULL }; +static ssize_t show_conn_type(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", conn->type == ACL_LINK ? "ACL" : "SCO"); +} + +static ssize_t show_conn_address(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + bdaddr_t bdaddr; + baswap(&bdaddr, &conn->dst); + return sprintf(buf, "%s\n", batostr(&bdaddr)); +} + +#define CONN_ATTR(_name,_mode,_show,_store) \ +struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store) + +static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL); +static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL); + +static struct device_attribute *conn_attrs[] = { + &conn_attr_type, + &conn_attr_address, + NULL +}; + +struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); +static struct bus_type bt_bus = { + .name = "bluetooth", +}; + +static struct platform_device *bt_platform; + +static void bt_release(struct device *dev) +{ + void *data = dev_get_drvdata(dev); + kfree(data); +} + +static void add_conn(void *data) +{ + struct hci_conn *conn = data; + int i; + + device_register(&conn->dev); + + for (i = 0; conn_attrs[i]; i++) + device_create_file(&conn->dev, conn_attrs[i]); +} + +void hci_conn_add_sysfs(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + bdaddr_t *ba = &conn->dst; + + BT_DBG("conn %p", conn); + + conn->dev.parent = &hdev->dev; + conn->dev.release = bt_release; + + snprintf(conn->dev.bus_id, BUS_ID_SIZE, + "%s%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X", + conn->type == ACL_LINK ? "acl" : "sco", + ba->b[5], ba->b[4], ba->b[3], + ba->b[2], ba->b[1], ba->b[0]); + + dev_set_drvdata(&conn->dev, conn); + + INIT_WORK(&conn->work, add_conn, (void *) conn); + + schedule_work(&conn->work); +} + +static void del_conn(void *data) +{ + struct hci_conn *conn = data; + device_del(&conn->dev); +} + +void hci_conn_del_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + INIT_WORK(&conn->work, del_conn, (void *) conn); + + schedule_work(&conn->work); +} + int hci_register_sysfs(struct hci_dev *hdev) { - struct class_device *cdev = &hdev->class_dev; + struct device *dev = &hdev->dev; unsigned int i; int err; BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - cdev->class = &bt_class; - class_set_devdata(cdev, hdev); + dev->class = bt_class; - strlcpy(cdev->class_id, hdev->name, BUS_ID_SIZE); - err = class_device_register(cdev); + if (hdev->parent) + dev->parent = hdev->parent; + else + dev->parent = &bt_platform->dev; + + strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); + + dev->release = bt_release; + + dev_set_drvdata(dev, hdev); + + err = device_register(dev); if (err < 0) return err; for (i = 0; bt_attrs[i]; i++) - class_device_create_file(cdev, bt_attrs[i]); + device_create_file(dev, bt_attrs[i]); return 0; } void hci_unregister_sysfs(struct hci_dev *hdev) { - struct class_device * cdev = &hdev->class_dev; - BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - class_device_del(cdev); + device_del(&hdev->dev); } int __init bt_sysfs_init(void) { - return class_register(&bt_class); + int err; + + bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0); + if (IS_ERR(bt_platform)) + return PTR_ERR(bt_platform); + + err = bus_register(&bt_bus); + if (err < 0) { + platform_device_unregister(bt_platform); + return err; + } + + bt_class = class_create(THIS_MODULE, "bluetooth"); + if (IS_ERR(bt_class)) { + bus_unregister(&bt_bus); + platform_device_unregister(bt_platform); + return PTR_ERR(bt_class); + } + + return 0; } -void __exit bt_sysfs_cleanup(void) +void bt_sysfs_cleanup(void) { - class_unregister(&bt_class); + class_destroy(bt_class); + + bus_unregister(&bt_bus); + + platform_device_unregister(bt_platform); } diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index edfea772fb67..c6abf2a5a932 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,7 +1,6 @@ config BT_HIDP tristate "HIDP protocol support" - depends on BT && BT_L2CAP && (BROKEN || !S390) - select INPUT + depends on BT && BT_L2CAP && INPUT help HIDP (Human Interface Device Protocol) is a transport layer for HID reports. HIDP is required for the Bluetooth Human diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index cdb9cfafd960..03b5dadb4951 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -41,6 +40,7 @@ #include <linux/input.h> #include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include "hidp.h" @@ -529,6 +529,26 @@ static int hidp_session(void *arg) return 0; } +static struct device *hidp_get_device(struct hidp_session *session) +{ + bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src; + bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst; + struct hci_dev *hdev; + struct hci_conn *conn; + + hdev = hci_get_route(dst, src); + if (!hdev) + return NULL; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); + if (!conn) + return NULL; + + hci_dev_put(hdev); + + return &conn->dev; +} + static inline void hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { struct input_dev *input = session->input; @@ -567,6 +587,8 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co input->relbit[0] |= BIT(REL_WHEEL); } + input->cdev.dev = hidp_get_device(session); + input->event = hidp_input_event; input_register_device(input); @@ -583,10 +605,9 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) return -ENOTUNIQ; - session = kmalloc(sizeof(struct hidp_session), GFP_KERNEL); + session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL); if (!session) return -ENOMEM; - memset(session, 0, sizeof(struct hidp_session)); session->input = input_allocate_device(); if (!session->input) { diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index b8f67761b886..099646e4e2ef 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index f6b4a8085357..d56f60b392ac 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -24,7 +24,6 @@ /* Bluetooth L2CAP core and sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -64,11 +63,6 @@ static struct bt_sock_list l2cap_sk_list = { .lock = RW_LOCK_UNLOCKED }; -static int l2cap_conn_del(struct hci_conn *conn, int err); - -static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent); -static void l2cap_chan_del(struct sock *sk, int err); - static void __l2cap_sock_close(struct sock *sk, int reason); static void l2cap_sock_close(struct sock *sk); static void l2cap_sock_kill(struct sock *sk); @@ -110,24 +104,177 @@ static void l2cap_sock_init_timer(struct sock *sk) sk->sk_timer.data = (unsigned long)sk; } +/* ---- L2CAP channels ---- */ +static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->dcid == cid) + break; + } + return s; +} + +static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->scid == cid) + break; + } + return s; +} + +/* Find channel with given SCID. + * Returns locked socket */ +static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + read_lock(&l->lock); + s = __l2cap_get_chan_by_scid(l, cid); + if (s) bh_lock_sock(s); + read_unlock(&l->lock); + return s; +} + +static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->ident == ident) + break; + } + return s; +} + +static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) +{ + struct sock *s; + read_lock(&l->lock); + s = __l2cap_get_chan_by_ident(l, ident); + if (s) bh_lock_sock(s); + read_unlock(&l->lock); + return s; +} + +static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) +{ + u16 cid = 0x0040; + + for (; cid < 0xffff; cid++) { + if(!__l2cap_get_chan_by_scid(l, cid)) + return cid; + } + + return 0; +} + +static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk) +{ + sock_hold(sk); + + if (l->head) + l2cap_pi(l->head)->prev_c = sk; + + l2cap_pi(sk)->next_c = l->head; + l2cap_pi(sk)->prev_c = NULL; + l->head = sk; +} + +static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) +{ + struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; + + write_lock_bh(&l->lock); + if (sk == l->head) + l->head = next; + + if (next) + l2cap_pi(next)->prev_c = prev; + if (prev) + l2cap_pi(prev)->next_c = next; + write_unlock_bh(&l->lock); + + __sock_put(sk); +} + +static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) +{ + struct l2cap_chan_list *l = &conn->chan_list; + + BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); + + l2cap_pi(sk)->conn = conn; + + if (sk->sk_type == SOCK_SEQPACKET) { + /* Alloc CID for connection-oriented socket */ + l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + } else if (sk->sk_type == SOCK_DGRAM) { + /* Connectionless socket */ + l2cap_pi(sk)->scid = 0x0002; + l2cap_pi(sk)->dcid = 0x0002; + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } else { + /* Raw socket can send/recv signalling messages only */ + l2cap_pi(sk)->scid = 0x0001; + l2cap_pi(sk)->dcid = 0x0001; + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } + + __l2cap_chan_link(l, sk); + + if (parent) + bt_accept_enqueue(parent, sk); +} + +/* Delete channel. + * Must be called on the locked socket. */ +static void l2cap_chan_del(struct sock *sk, int err) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sock *parent = bt_sk(sk)->parent; + + l2cap_sock_clear_timer(sk); + + BT_DBG("sk %p, conn %p, err %d", sk, conn, err); + + if (conn) { + /* Unlink from channel list */ + l2cap_chan_unlink(&conn->chan_list, sk); + l2cap_pi(sk)->conn = NULL; + hci_conn_put(conn->hcon); + } + + sk->sk_state = BT_CLOSED; + sock_set_flag(sk, SOCK_ZAPPED); + + if (err) + sk->sk_err = err; + + if (parent) { + bt_accept_unlink(sk); + parent->sk_data_ready(parent, 0); + } else + sk->sk_state_change(sk); +} + /* ---- L2CAP connections ---- */ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) { - struct l2cap_conn *conn; - - if ((conn = hcon->l2cap_data)) - return conn; + struct l2cap_conn *conn = hcon->l2cap_data; - if (status) + if (conn || status) return conn; - if (!(conn = kmalloc(sizeof(struct l2cap_conn), GFP_ATOMIC))) + conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC); + if (!conn) return NULL; - memset(conn, 0, sizeof(struct l2cap_conn)); hcon->l2cap_data = conn; conn->hcon = hcon; + BT_DBG("hcon %p conn %p", hcon, conn); + conn->mtu = hcon->hdev->acl_mtu; conn->src = &hcon->hdev->bdaddr; conn->dst = &hcon->dst; @@ -135,17 +282,16 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); - BT_DBG("hcon %p conn %p", hcon, conn); return conn; } -static int l2cap_conn_del(struct hci_conn *hcon, int err) +static void l2cap_conn_del(struct hci_conn *hcon, int err) { - struct l2cap_conn *conn; + struct l2cap_conn *conn = hcon->l2cap_data; struct sock *sk; - if (!(conn = hcon->l2cap_data)) - return 0; + if (!conn) + return; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); @@ -162,15 +308,14 @@ static int l2cap_conn_del(struct hci_conn *hcon, int err) hcon->l2cap_data = NULL; kfree(conn); - return 0; } static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) { struct l2cap_chan_list *l = &conn->chan_list; - write_lock(&l->lock); + write_lock_bh(&l->lock); __l2cap_chan_add(conn, sk, parent); - write_unlock(&l->lock); + write_unlock_bh(&l->lock); } static inline u8 l2cap_get_ident(struct l2cap_conn *conn) @@ -183,14 +328,14 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn) * 200 - 254 are used by utilities like l2ping, etc. */ - spin_lock(&conn->lock); + spin_lock_bh(&conn->lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; - spin_unlock(&conn->lock); + spin_unlock_bh(&conn->lock); return id; } @@ -926,160 +1071,6 @@ static int l2cap_sock_release(struct socket *sock) return err; } -/* ---- L2CAP channels ---- */ -static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->dcid == cid) - break; - } - return s; -} - -static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->scid == cid) - break; - } - return s; -} - -/* Find channel with given SCID. - * Returns locked socket */ -static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - read_lock(&l->lock); - s = __l2cap_get_chan_by_scid(l, cid); - if (s) bh_lock_sock(s); - read_unlock(&l->lock); - return s; -} - -static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->ident == ident) - break; - } - return s; -} - -static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) -{ - struct sock *s; - read_lock(&l->lock); - s = __l2cap_get_chan_by_ident(l, ident); - if (s) bh_lock_sock(s); - read_unlock(&l->lock); - return s; -} - -static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) -{ - u16 cid = 0x0040; - - for (; cid < 0xffff; cid++) { - if(!__l2cap_get_chan_by_scid(l, cid)) - return cid; - } - - return 0; -} - -static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk) -{ - sock_hold(sk); - - if (l->head) - l2cap_pi(l->head)->prev_c = sk; - - l2cap_pi(sk)->next_c = l->head; - l2cap_pi(sk)->prev_c = NULL; - l->head = sk; -} - -static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) -{ - struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; - - write_lock(&l->lock); - if (sk == l->head) - l->head = next; - - if (next) - l2cap_pi(next)->prev_c = prev; - if (prev) - l2cap_pi(prev)->next_c = next; - write_unlock(&l->lock); - - __sock_put(sk); -} - -static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) -{ - struct l2cap_chan_list *l = &conn->chan_list; - - BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); - - l2cap_pi(sk)->conn = conn; - - if (sk->sk_type == SOCK_SEQPACKET) { - /* Alloc CID for connection-oriented socket */ - l2cap_pi(sk)->scid = l2cap_alloc_cid(l); - } else if (sk->sk_type == SOCK_DGRAM) { - /* Connectionless socket */ - l2cap_pi(sk)->scid = 0x0002; - l2cap_pi(sk)->dcid = 0x0002; - l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; - } else { - /* Raw socket can send/recv signalling messages only */ - l2cap_pi(sk)->scid = 0x0001; - l2cap_pi(sk)->dcid = 0x0001; - l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; - } - - __l2cap_chan_link(l, sk); - - if (parent) - bt_accept_enqueue(parent, sk); -} - -/* Delete channel. - * Must be called on the locked socket. */ -static void l2cap_chan_del(struct sock *sk, int err) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sock *parent = bt_sk(sk)->parent; - - l2cap_sock_clear_timer(sk); - - BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - - if (conn) { - /* Unlink from channel list */ - l2cap_chan_unlink(&conn->chan_list, sk); - l2cap_pi(sk)->conn = NULL; - hci_conn_put(conn->hcon); - } - - sk->sk_state = BT_CLOSED; - sock_set_flag(sk, SOCK_ZAPPED); - - if (err) - sk->sk_err = err; - - if (parent) { - bt_accept_unlink(sk); - parent->sk_data_ready(parent, 0); - } else - sk->sk_state_change(sk); -} - static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan_list *l = &conn->chan_list; @@ -1425,11 +1416,11 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd if (!sk) goto response; - write_lock(&list->lock); + write_lock_bh(&list->lock); /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(list, scid)) { - write_unlock(&list->lock); + write_unlock_bh(&list->lock); sock_set_flag(sk, SOCK_ZAPPED); l2cap_sock_kill(sk); goto response; @@ -1467,7 +1458,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd result = status = 0; done: - write_unlock(&list->lock); + write_unlock_bh(&list->lock); response: bh_unlock_sock(parent); @@ -1835,7 +1826,9 @@ drop: kfree_skb(skb); done: - if (sk) bh_unlock_sock(sk); + if (sk) + bh_unlock_sock(sk); + return 0; } @@ -1926,18 +1919,18 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { + struct l2cap_conn *conn; + BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (hcon->type != ACL_LINK) return 0; if (!status) { - struct l2cap_conn *conn; - conn = l2cap_conn_add(hcon, status); if (conn) l2cap_conn_ready(conn); - } else + } else l2cap_conn_del(hcon, bt_err(status)); return 0; @@ -1951,19 +1944,21 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) return 0; l2cap_conn_del(hcon, bt_err(reason)); + return 0; } static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_chan_list *l; - struct l2cap_conn *conn; + struct l2cap_conn *conn = conn = hcon->l2cap_data; struct l2cap_conn_rsp rsp; struct sock *sk; int result; - if (!(conn = hcon->l2cap_data)) + if (!conn) return 0; + l = &conn->chan_list; BT_DBG("conn %p", conn); @@ -2006,13 +2001,14 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_chan_list *l; - struct l2cap_conn *conn; + struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_conn_rsp rsp; struct sock *sk; int result; - if (!(conn = hcon->l2cap_data)) + if (!conn) return 0; + l = &conn->chan_list; BT_DBG("conn %p", conn); @@ -2220,7 +2216,7 @@ static int __init l2cap_init(void) goto error; } - class_create_file(&bt_class, &class_attr_l2cap); + class_create_file(bt_class, &class_attr_l2cap); BT_INFO("L2CAP ver %s", VERSION); BT_INFO("L2CAP socket layer initialized"); @@ -2234,7 +2230,7 @@ error: static void __exit l2cap_exit(void) { - class_remove_file(&bt_class, &class_attr_l2cap); + class_remove_file(bt_class, &class_attr_l2cap); if (bt_sock_unregister(BTPROTO_L2CAP) < 0) BT_ERR("L2CAP socket unregistration failed"); diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index ee6a66979913..e5fd0cb70ae9 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -24,7 +24,6 @@ /* Bluetooth kernel library. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e99010ce8bb2..468df3b953f6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -27,7 +27,6 @@ * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -53,8 +52,10 @@ #define BT_DBG(D...) #endif -#define VERSION "1.7" +#define VERSION "1.8" +static int disable_cfc = 0; +static int channel_mtu = -1; static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; static struct task_struct *rfcomm_thread; @@ -273,10 +274,10 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) { - struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); + struct rfcomm_dlc *d = kzalloc(sizeof(*d), prio); + if (!d) return NULL; - memset(d, 0, sizeof(*d)); init_timer(&d->timer); d->timer.function = rfcomm_dlc_timeout; @@ -289,6 +290,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) rfcomm_dlc_clear_state(d); BT_DBG("%p", d); + return d; } @@ -522,10 +524,10 @@ int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig) /* ---- RFCOMM sessions ---- */ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) { - struct rfcomm_session *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct rfcomm_session *s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) return NULL; - memset(s, 0, sizeof(*s)); BT_DBG("session %p sock %p", s, sock); @@ -534,7 +536,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) s->sock = sock; s->mtu = RFCOMM_DEFAULT_MTU; - s->cfc = RFCOMM_CFC_UNKNOWN; + s->cfc = disable_cfc ? RFCOMM_CFC_DISABLED : RFCOMM_CFC_UNKNOWN; /* Do not increment module usage count for listening sessions. * Otherwise we won't be able to unload the module. */ @@ -642,7 +644,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst addr.l2_family = AF_BLUETOOTH; addr.l2_psm = htobs(RFCOMM_PSM); *err = sock->ops->connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); - if (*err == 0 || *err == -EAGAIN) + if (*err == 0 || *err == -EINPROGRESS) return s; rfcomm_session_del(s); @@ -811,7 +813,10 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d pn->credits = 0; } - pn->mtu = htobs(d->mtu); + if (cr && channel_mtu >= 0) + pn->mtu = htobs(channel_mtu); + else + pn->mtu = htobs(d->mtu); *ptr = __fcs(buf); ptr++; @@ -1150,6 +1155,8 @@ static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) static void rfcomm_dlc_accept(struct rfcomm_dlc *d) { + struct sock *sk = d->session->sock->sk; + BT_DBG("dlc %p", d); rfcomm_send_ua(d->session, d->dlci); @@ -1159,6 +1166,9 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d) d->state_change(d, 0); rfcomm_dlc_unlock(d); + if (d->link_mode & RFCOMM_LM_MASTER) + hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); + rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); } @@ -1223,17 +1233,24 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn) BT_DBG("dlc %p state %ld dlci %d mtu %d fc 0x%x credits %d", d, d->state, d->dlci, pn->mtu, pn->flow_ctrl, pn->credits); - if (pn->flow_ctrl == 0xf0 || pn->flow_ctrl == 0xe0) { - d->cfc = s->cfc = RFCOMM_CFC_ENABLED; + if ((pn->flow_ctrl == 0xf0 && s->cfc != RFCOMM_CFC_DISABLED) || + pn->flow_ctrl == 0xe0) { + d->cfc = RFCOMM_CFC_ENABLED; d->tx_credits = pn->credits; } else { - d->cfc = s->cfc = RFCOMM_CFC_DISABLED; + d->cfc = RFCOMM_CFC_DISABLED; set_bit(RFCOMM_TX_THROTTLED, &d->flags); } + if (s->cfc == RFCOMM_CFC_UNKNOWN) + s->cfc = d->cfc; + d->priority = pn->priority; - d->mtu = s->mtu = btohs(pn->mtu); + d->mtu = btohs(pn->mtu); + + if (cr && d->mtu > s->mtu) + d->mtu = s->mtu; return 0; } @@ -1760,6 +1777,11 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s) s = rfcomm_session_add(nsock, BT_OPEN); if (s) { rfcomm_session_hold(s); + + /* We should adjust MTU on incoming sessions. + * L2CAP MTU minus UIH header and FCS. */ + s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5; + rfcomm_schedule(RFCOMM_SCHED_RX); } else sock_release(nsock); @@ -2036,7 +2058,7 @@ static int __init rfcomm_init(void) kernel_thread(rfcomm_run, NULL, CLONE_KERNEL); - class_create_file(&bt_class, &class_attr_rfcomm_dlc); + class_create_file(bt_class, &class_attr_rfcomm_dlc); rfcomm_init_sockets(); @@ -2051,7 +2073,7 @@ static int __init rfcomm_init(void) static void __exit rfcomm_exit(void) { - class_remove_file(&bt_class, &class_attr_rfcomm_dlc); + class_remove_file(bt_class, &class_attr_rfcomm_dlc); hci_unregister_cb(&rfcomm_cb); @@ -2074,6 +2096,12 @@ static void __exit rfcomm_exit(void) module_init(rfcomm_init); module_exit(rfcomm_exit); +module_param(disable_cfc, bool, 0644); +MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control"); + +module_param(channel_mtu, int, 0644); +MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel"); + module_param(l2cap_mtu, uint, 0644); MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 757d2dd3b02f..220fee04e7f2 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -27,7 +27,6 @@ * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -945,7 +944,7 @@ int __init rfcomm_init_sockets(void) if (err < 0) goto error; - class_create_file(&bt_class, &class_attr_rfcomm); + class_create_file(bt_class, &class_attr_rfcomm); BT_INFO("RFCOMM socket layer initialized"); @@ -959,7 +958,7 @@ error: void __exit rfcomm_cleanup_sockets(void) { - class_remove_file(&bt_class, &class_attr_rfcomm); + class_remove_file(bt_class, &class_attr_rfcomm); if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) BT_ERR("RFCOMM socket layer unregistration failed"); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 74368f79ee5d..26f322737db0 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -27,7 +27,6 @@ * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/tty.h> @@ -39,6 +38,7 @@ #include <linux/skbuff.h> #include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> #include <net/bluetooth/rfcomm.h> #ifndef CONFIG_BT_RFCOMM_DEBUG @@ -162,6 +162,24 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id) return dev; } +static struct device *rfcomm_get_device(struct rfcomm_dev *dev) +{ + struct hci_dev *hdev; + struct hci_conn *conn; + + hdev = hci_get_route(&dev->dst, &dev->src); + if (!hdev) + return NULL; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst); + if (!conn) + return NULL; + + hci_dev_put(hdev); + + return &conn->dev; +} + static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) { struct rfcomm_dev *dev; @@ -170,10 +188,9 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) BT_DBG("id %d channel %d", req->dev_id, req->channel); - dev = kmalloc(sizeof(struct rfcomm_dev), GFP_KERNEL); + dev = kzalloc(sizeof(struct rfcomm_dev), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(struct rfcomm_dev)); write_lock_bh(&rfcomm_dev_lock); @@ -246,7 +263,7 @@ out: return err; } - tty_register_device(rfcomm_tty_driver, dev->id, NULL); + tty_register_device(rfcomm_tty_driver, dev->id, rfcomm_get_device(dev)); return dev->id; } @@ -480,12 +497,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); - if (test_bit(TTY_DONT_FLIP, &tty->flags)) { - tty_buffer_request_room(tty, skb->len); - tty_insert_flip_string(tty, skb->data, skb->len); - tty_flip_buffer_push(tty); - } else - tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len); + tty_insert_flip_string(tty, skb->data, skb->len); + tty_flip_buffer_push(tty); kfree_skb(skb); } @@ -1025,13 +1038,12 @@ int rfcomm_init_ttys(void) rfcomm_tty_driver->owner = THIS_MODULE; rfcomm_tty_driver->driver_name = "rfcomm"; - rfcomm_tty_driver->devfs_name = "bluetooth/rfcomm/"; rfcomm_tty_driver->name = "rfcomm"; rfcomm_tty_driver->major = RFCOMM_TTY_MAJOR; rfcomm_tty_driver->minor_start = RFCOMM_TTY_MINOR; rfcomm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL; - rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS; + rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; rfcomm_tty_driver->init_termios = tty_std_termios; rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 0c2d13ad69bb..7714a2ec3854 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -24,7 +24,6 @@ /* Bluetooth SCO sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -109,17 +108,14 @@ static void sco_sock_init_timer(struct sock *sk) static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status) { struct hci_dev *hdev = hcon->hdev; - struct sco_conn *conn; - - if ((conn = hcon->sco_data)) - return conn; + struct sco_conn *conn = hcon->sco_data; - if (status) + if (conn || status) return conn; - if (!(conn = kmalloc(sizeof(struct sco_conn), GFP_ATOMIC))) + conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC); + if (!conn) return NULL; - memset(conn, 0, sizeof(struct sco_conn)); spin_lock_init(&conn->lock); @@ -135,6 +131,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status) conn->mtu = 60; BT_DBG("hcon %p conn %p", hcon, conn); + return conn; } @@ -970,7 +967,7 @@ static int __init sco_init(void) goto error; } - class_create_file(&bt_class, &class_attr_sco); + class_create_file(bt_class, &class_attr_sco); BT_INFO("SCO (Voice Link) ver %s", VERSION); BT_INFO("SCO socket layer initialized"); @@ -984,7 +981,7 @@ error: static void __exit sco_exit(void) { - class_remove_file(&bt_class, &class_attr_sco); + class_remove_file(bt_class, &class_attr_sco); if (bt_sock_unregister(BTPROTO_SCO) < 0) BT_ERR("SCO socket unregistration failed"); diff --git a/net/bridge/br.c b/net/bridge/br.c index 654401ceb2db..2994387999a8 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -13,7 +13,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 2afdc7c0736c..f8dbcee80eba 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -184,6 +184,6 @@ void br_dev_setup(struct net_device *dev) dev->set_mac_address = br_set_mac_address; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST - | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_NO_CSUM; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 8be9f2123e54..191b861e5e53 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,16 +35,17 @@ static inline unsigned packet_length(const struct sk_buff *skb) int br_dev_queue_push_xmit(struct sk_buff *skb) { /* drop mtu oversized packets except gso */ - if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size) + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { -#ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ - nf_bridge_maybe_copy_header(skb); -#endif - skb_push(skb, ETH_HLEN); + if (nf_bridge_maybe_copy_header(skb)) + kfree_skb(skb); + else { + skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); + dev_queue_xmit(skb); + } } return 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 07956ecf545e..b1211d5342f6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,13 +386,19 @@ void br_features_recompute(struct net_bridge *br) checksum = 0; if (feature & NETIF_F_GSO) - feature |= NETIF_F_TSO; + feature |= NETIF_F_GSO_SOFTWARE; feature |= NETIF_F_GSO; features &= feature; } - br->dev->features = features | checksum | NETIF_F_LLTX; + if (!(checksum & NETIF_F_ALL_CSUM)) + features &= ~NETIF_F_SG; + if (!(features & NETIF_F_SG)) + features &= ~NETIF_F_GSO_MASK; + + br->dev->features = features | checksum | NETIF_F_LLTX | + NETIF_F_GSO_ROBUST; } /* called with RTNL */ diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 159fb8409824..4e4119a12139 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -162,12 +162,10 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (num > BR_MAX_PORTS) num = BR_MAX_PORTS; - indices = kmalloc(num*sizeof(int), GFP_KERNEL); + indices = kcalloc(num, sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; - memset(indices, 0, num*sizeof(int)); - get_port_ifindices(br, indices, num); if (copy_to_user((void __user *)args[1], indices, num*sizeof(int))) num = -EFAULT; @@ -327,11 +325,10 @@ static int old_deviceless(void __user *uarg) if (args[2] >= 2048) return -ENOMEM; - indices = kmalloc(args[2]*sizeof(int), GFP_KERNEL); + indices = kcalloc(args[2], sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; - memset(indices, 0, args[2]*sizeof(int)); args[2] = get_bridge_ifindices(indices, args[2]); ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 8298a5179aef..ac181be13d83 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -53,14 +53,17 @@ #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; -static int brnf_call_iptables = 1; -static int brnf_call_ip6tables = 1; -static int brnf_call_arptables = 1; -static int brnf_filter_vlan_tagged = 1; +static int brnf_call_iptables __read_mostly = 1; +static int brnf_call_ip6tables __read_mostly = 1; +static int brnf_call_arptables __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 1; #else #define brnf_filter_vlan_tagged 1 #endif +int brnf_deferred_hooks; +EXPORT_SYMBOL_GPL(brnf_deferred_hooks); + static __be16 inline vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -124,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) static inline void nf_bridge_save_header(struct sk_buff *skb) { - int header_size = 16; + int header_size = ETH_HLEN; if (skb->protocol == htons(ETH_P_8021Q)) - header_size = 18; + header_size += VLAN_HLEN; memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); } +/* + * When forwarding bridge frames, we save a copy of the original + * header before processing. + */ +int nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + int header_size = ETH_HLEN; + + if (skb->protocol == htons(ETH_P_8021Q)) + header_size += VLAN_HLEN; + + err = skb_cow(skb, header_size); + if (err) + return err; + + memcpy(skb->data - header_size, skb->nf_bridge->data, header_size); + + if (skb->protocol == htons(ETH_P_8021Q)) + __skb_push(skb, VLAN_HLEN); + return 0; +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ @@ -692,16 +718,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; -#ifdef CONFIG_NETFILTER_DEBUG - /* Sometimes we get packets with NULL ->dst here (for example, - * running a dhcp client daemon triggers this). This should now - * be fixed, but let's keep the check around. */ - if (skb->dst == NULL) { - printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); - return NF_ACCEPT; - } -#endif - nf_bridge = skb->nf_bridge; nf_bridge->physoutdev = skb->dev; realindev = nf_bridge->physindev; @@ -761,7 +777,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP) && skb->len > skb->dev->mtu && - !skb_shinfo(skb)->gso_size) + !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); @@ -783,7 +799,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, * keep the check just to be sure... */ if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " - "bad mac.raw pointer."); + "bad mac.raw pointer.\n"); goto print_error; } #endif @@ -801,7 +817,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, #ifdef CONFIG_NETFILTER_DEBUG if (skb->dst == NULL) { - printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); + printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n"); goto print_error; } #endif @@ -838,6 +854,7 @@ print_error: } printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, skb->data); + dump_stack(); return NF_ACCEPT; #endif } @@ -890,6 +907,8 @@ static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, return NF_ACCEPT; else if (ip->version == 6 && !brnf_call_ip6tables) return NF_ACCEPT; + else if (!brnf_deferred_hooks) + return NF_ACCEPT; #endif if (hook == NF_IP_POST_ROUTING) return NF_ACCEPT; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 881d7d1a732a..8f661195d09d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/rtnetlink.h> +#include <net/netlink.h> #include "br_private.h" /* @@ -76,26 +77,24 @@ rtattr_failure: void br_ifinfo_notify(int event, struct net_bridge_port *port) { struct sk_buff *skb; - int err = -ENOMEM; + int payload = sizeof(struct ifinfomsg) + 128; + int err = -ENOBUFS; pr_debug("bridge notify event=%d\n", event); - skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128), - GFP_ATOMIC); - if (!skb) - goto err_out; - - err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); - if (err) - goto err_kfree; - - NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); - return; - -err_kfree: - kfree_skb(skb); -err_out: - netlink_set_err(rtnl, 0, RTNLGRP_LINK, err); + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_LINK, err); } /* @@ -117,12 +116,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) continue; if (idx < s_idx) - continue; + goto cont; err = br_fill_ifinfo(skb, p, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); if (err <= 0) break; +cont: ++idx; } read_unlock(&dev_base_lock); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index a7ba0cce0b46..068d8afbf0a7 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -121,7 +121,7 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) buf[1] = 0; buf[2] = 0; buf[3] = BPDU_TYPE_TCN; - br_send_bpdu(p, buf, 7); + br_send_bpdu(p, buf, 4); } /* diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index d19fc4b328dc..0aa7b9910a86 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -20,7 +20,7 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, const void *data, unsigned int datalen) { struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; - u32 _sip, *siptr, _dip, *diptr; + __be32 _sip, *siptr, _dip, *diptr; struct arphdr _ah, *ap; unsigned char _sha[ETH_ALEN], *shp; struct sk_buff *skb = *pskb; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ee5a51761260..9f950db3b76f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -29,7 +29,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/skbuff.h> @@ -75,6 +74,9 @@ static void ulog_send(unsigned int nlgroup) if (timer_pending(&ub->timer)) del_timer(&ub->timer); + if (!ub->skb) + return; + /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3a13ed643459..3df55b2bd91d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -24,6 +24,7 @@ #include <linux/vmalloc.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> +#include <linux/mutex.h> #include <asm/uaccess.h> #include <linux/smp.h> #include <linux/cpumask.h> @@ -31,36 +32,9 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -/* list_named_find */ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include <linux/netfilter_ipv4/listhelp.h> -#include <linux/mutex.h> - -#if 0 -/* use this for remote debugging - * Copyright (C) 1998 by Ori Pomerantz - * Print the string to the appropriate tty, the one - * the current task uses - */ -static void print_string(char *str) -{ - struct tty_struct *my_tty; - - /* The tty for the current task */ - my_tty = current->signal->tty; - if (my_tty != NULL) { - my_tty->driver->write(my_tty, 0, str, strlen(str)); - my_tty->driver->write(my_tty, 0, "\015\012", 2); - } -} - -#define BUGPRINT(args) print_string(args); -#else #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -#endif #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ ": out of memory: "format, ## args) /* #define MEMPRINT(format, args...) */ @@ -299,18 +273,22 @@ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, struct mutex *mutex) { - void *ret; + struct { + struct list_head list; + char name[EBT_FUNCTION_MAXNAMELEN]; + } *e; *error = mutex_lock_interruptible(mutex); if (*error != 0) return NULL; - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - mutex_unlock(mutex); + list_for_each_entry(e, head, list) { + if (strcmp(e->name, name) == 0) + return e; } - return ret; + *error = -ENOENT; + mutex_unlock(mutex); + return NULL; } #ifndef CONFIG_KMOD @@ -1064,15 +1042,19 @@ free_newinfo: int ebt_register_target(struct ebt_target *target) { + struct ebt_target *t; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_targets, target)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(t, &ebt_targets, list) { + if (strcmp(t->name, target->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&target->list, &ebt_targets); mutex_unlock(&ebt_mutex); return 0; @@ -1081,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target) void ebt_unregister_target(struct ebt_target *target) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_targets, target); + list_del(&target->list); mutex_unlock(&ebt_mutex); } int ebt_register_match(struct ebt_match *match) { + struct ebt_match *m; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_matches, match)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(m, &ebt_matches, list) { + if (strcmp(m->name, match->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&match->list, &ebt_matches); mutex_unlock(&ebt_mutex); return 0; @@ -1104,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match) void ebt_unregister_match(struct ebt_match *match) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_matches, match); + list_del(&match->list); mutex_unlock(&ebt_mutex); } int ebt_register_watcher(struct ebt_watcher *watcher) { + struct ebt_watcher *w; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_watchers, watcher)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(w, &ebt_watchers, list) { + if (strcmp(w->name, watcher->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&watcher->list, &ebt_watchers); mutex_unlock(&ebt_mutex); return 0; @@ -1127,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher) void ebt_unregister_watcher(struct ebt_watcher *watcher) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_watchers, watcher); + list_del(&watcher->list); mutex_unlock(&ebt_mutex); } int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; + struct ebt_table *t; int ret, i, countersize; if (!table || !table->table ||!table->table->entries || @@ -1179,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table) if (ret != 0) goto free_chainstack; - if (list_named_find(&ebt_tables, table->name)) { - ret = -EEXIST; - BUGPRINT("Table name already exists\n"); - goto free_unlock; + list_for_each_entry(t, &ebt_tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + BUGPRINT("Table name already exists\n"); + goto free_unlock; + } } /* Hold a reference count if the chains aren't empty */ @@ -1190,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_prepend(&ebt_tables, table); + list_add(&table->list, &ebt_tables); mutex_unlock(&ebt_mutex); return 0; free_unlock: @@ -1216,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table) return; } mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_tables, table); + list_del(&table->list); mutex_unlock(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { @@ -1486,7 +1479,7 @@ static int __init ebtables_init(void) int ret; mutex_lock(&ebt_mutex); - list_named_insert(&ebt_targets, &ebt_standard_target); + list_add(&ebt_standard_target.list, &ebt_targets); mutex_unlock(&ebt_mutex); if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; diff --git a/net/core/Makefile b/net/core/Makefile index e9bd2467d5a9..119568077dab 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o @@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o +obj-$(CONFIG_FIB_RULES) += fib_rules.o diff --git a/net/core/datagram.c b/net/core/datagram.c index aecddcc30401..f558c61aecc7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb) sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, goto fault; if ((unsigned short)csum_fold(csum)) goto csum_error; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); iov->iov_len -= chunk; iov->iov_base += chunk; diff --git a/net/core/dev.c b/net/core/dev.c index ea2469398bd5..4d891beab138 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -76,7 +76,6 @@ #include <asm/system.h> #include <linux/bitops.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/cpu.h> #include <linux/types.h> #include <linux/kernel.h> @@ -117,6 +116,7 @@ #include <linux/audit.h> #include <linux/dmaengine.h> #include <linux/err.h> +#include <linux/ctype.h> /* * The list of packet types we will receive (as opposed to discard) @@ -230,7 +230,7 @@ extern void netdev_unregister_sysfs(struct net_device *); * For efficiency */ -int netdev_nit; +static int netdev_nit; /* * Add a protocol ID to the list. Now that the input handler is @@ -633,14 +633,24 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas * @name: name string * * Network device names need to be valid file names to - * to allow sysfs to work + * to allow sysfs to work. We also disallow any kind of + * whitespace. */ int dev_valid_name(const char *name) { - return !(*name == '\0' - || !strcmp(name, ".") - || !strcmp(name, "..") - || strchr(name, '/')); + if (*name == '\0') + return 0; + if (strlen(name) >= IFNAMSIZ) + return 0; + if (!strcmp(name, ".") || !strcmp(name, "..")) + return 0; + + while (*name) { + if (*name == '/' || isspace(*name)) + return 0; + name++; + } + return 1; } /** @@ -1158,14 +1168,17 @@ EXPORT_SYMBOL(netif_device_attach); * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. */ -int skb_checksum_help(struct sk_buff *skb, int inward) +int skb_checksum_help(struct sk_buff *skb) { unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (inward) { - skb->ip_summed = CHECKSUM_NONE; - goto out; + if (skb->ip_summed == CHECKSUM_COMPLETE) + goto out_set_summed; + + if (unlikely(skb_shinfo(skb)->gso_size)) { + /* Let GSO fix up the checksum. */ + goto out_set_summed; } if (skb_cloned(skb)) { @@ -1182,6 +1195,8 @@ int skb_checksum_help(struct sk_buff *skb, int inward) BUG_ON(skb->csum + 2 > offset); *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); + +out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: return ret; @@ -1190,32 +1205,50 @@ out: /** * skb_gso_segment - Perform segmentation on skb. * @skb: buffer to segment - * @sg: whether scatter-gather is supported on the target. + * @features: features for the output path (see dev->features) * * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; int type = skb->protocol; + int err; BUG_ON(skb_shinfo(skb)->frag_list); - BUG_ON(skb->ip_summed != CHECKSUM_HW); skb->mac.raw = skb->data; skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - segs = ptype->gso_segment(skb, sg); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + err = ptype->gso_send_check(skb); + segs = ERR_PTR(err); + if (err || skb_gso_ok(skb, features)) + break; + __skb_push(skb, skb->data - skb->nh.raw); + } + segs = ptype->gso_segment(skb, features); break; } } rcu_read_unlock(); + __skb_push(skb, skb->data - skb->mac.raw); + return segs; } @@ -1234,7 +1267,6 @@ void netdev_rx_csum_fault(struct net_device *dev) EXPORT_SYMBOL(netdev_rx_csum_fault); #endif -#ifdef CONFIG_HIGHMEM /* Actually, we should eliminate this check as soon as we know, that: * 1. IOMMU is present and allows to map all the memory. * 2. No high memory really exists on this machine. @@ -1242,6 +1274,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { +#ifdef CONFIG_HIGHMEM int i; if (dev->features & NETIF_F_HIGHDMA) @@ -1251,11 +1284,9 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (PageHighMem(skb_shinfo(skb)->frags[i].page)) return 1; +#endif return 0; } -#else -#define illegal_highdma(dev, skb) (0) -#endif struct dev_gso_cb { void (*destructor)(struct sk_buff *skb); @@ -1291,9 +1322,15 @@ static int dev_gso_segment(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct sk_buff *segs; + int features = dev->features & ~(illegal_highdma(dev, skb) ? + NETIF_F_SG : 0); + + segs = skb_gso_segment(skb, features); + + /* Verifying header integrity only. */ + if (!segs) + return 0; - segs = skb_gso_segment(skb, dev->features & NETIF_F_SG && - !illegal_highdma(dev, skb)); if (unlikely(IS_ERR(segs))) return PTR_ERR(segs); @@ -1310,13 +1347,17 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if (netdev_nit) dev_queue_xmit_nit(skb, dev); - if (!netif_needs_gso(dev, skb)) - return dev->hard_start_xmit(skb, dev); + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + if (skb->next) + goto gso; + } - if (unlikely(dev_gso_segment(skb))) - goto out_kfree_skb; + return dev->hard_start_xmit(skb, dev); } +gso: do { struct sk_buff *nskb = skb->next; int rc; @@ -1325,9 +1366,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) nskb->next = NULL; rc = dev->hard_start_xmit(nskb, dev); if (unlikely(rc)) { + nskb->next = skb->next; skb->next = nskb; return rc; } + if (unlikely(netif_queue_stopped(dev) && skb->next)) + return NETDEV_TX_BUSY; } while (skb->next); skb->destructor = DEV_GSO_CB(skb)->destructor; @@ -1402,11 +1446,11 @@ int dev_queue_xmit(struct sk_buff *skb) /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_PARTIAL && (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) - if (skb_checksum_help(skb, 0)) + if (skb_checksum_help(skb)) goto out_kfree_skb; gso: @@ -1436,14 +1480,16 @@ gso: if (q->enqueue) { /* Grab device queue */ spin_lock(&dev->queue_lock); + q = dev->qdisc; + if (q->enqueue) { + rc = q->enqueue(skb, q); + qdisc_run(dev); + spin_unlock(&dev->queue_lock); - rc = q->enqueue(skb, q); - - qdisc_run(dev); - + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; + } spin_unlock(&dev->queue_lock); - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; } /* The device has no queue. Common case for software devices: @@ -1586,26 +1632,10 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) struct net_device *dev = skb->dev; if (dev->master) { - /* - * On bonding slaves other than the currently active - * slave, suppress duplicates except for 802.3ad - * ETH_P_SLOW and alb non-mcast/bcast. - */ - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if (dev->master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - goto keep; - } - - if (dev->master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - goto keep; - + if (skb_bond_should_drop(skb)) { kfree_skb(skb); return NULL; } -keep: skb->dev = dev->master; } @@ -1712,7 +1742,7 @@ static int ing_filter(struct sk_buff *skb) if (dev->qdisc_ingress) { __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { - printk("Redir loop detected Dropping packet (%s->%s)\n", + printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n", skb->input_dev->name, skb->dev->name); return TC_ACT_SHOT; } @@ -2907,7 +2937,7 @@ int register_netdevice(struct net_device *dev) /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && !(dev->features & NETIF_F_ALL_CSUM)) { - printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", dev->name); dev->features &= ~NETIF_F_SG; } @@ -2915,7 +2945,7 @@ int register_netdevice(struct net_device *dev) /* TSO requires that SG is present as well. */ if ((dev->features & NETIF_F_TSO) && !(dev->features & NETIF_F_SG)) { - printk("%s: Dropping NETIF_F_TSO since no SG feature.\n", + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", dev->name); dev->features &= ~NETIF_F_TSO; } @@ -3165,13 +3195,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, struct net_device *dev; int alloc_size; + BUG_ON(strlen(name) >= sizeof(dev->name)); + /* ensure 32-byte alignment of both the device and private area */ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { - printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); + printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); return NULL; } @@ -3386,12 +3418,9 @@ static void net_dma_rebalance(void) unsigned int cpu, i, n; struct dma_chan *chan; - lock_cpu_hotplug(); - if (net_dma_count == 0) { for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); - unlock_cpu_hotplug(); + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; } @@ -3404,15 +3433,13 @@ static void net_dma_rebalance(void) + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); while(n) { - per_cpu(softnet_data.net_dma, cpu) = chan; + per_cpu(softnet_data, cpu).net_dma = chan; cpu = next_cpu(cpu, cpu_online_map); n--; } i++; } rcu_read_unlock(); - - unlock_cpu_hotplug(); } /** diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index c57d887da2ef..b22648d04d36 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -21,8 +21,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> -#include <linux/module.h> +#include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> diff --git a/net/core/dst.c b/net/core/dst.c index 470c05bc4cb2..1a5e49da0e77 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy) dst_gc_timer_inc = DST_GC_INC; dst_gc_timer_expires = DST_GC_MIN; } - dst_gc_timer.expires = jiffies + dst_gc_timer_expires; #if RT_CACHE_DEBUG >= 2 printk("dst_total: %d/%d %ld\n", atomic_read(&dst_total), delayed, dst_gc_timer_expires); #endif - add_timer(&dst_gc_timer); + mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); out: spin_unlock(&dst_lock); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 27ce1683caf5..87dc556fd9d6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -143,7 +143,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) { struct ethtool_drvinfo info; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; if (!ops->get_drvinfo) return -EOPNOTSUPP; @@ -169,7 +169,7 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; void *regbuf; int reglen, ret; @@ -282,7 +282,7 @@ static int ethtool_get_link(struct net_device *dev, void __user *useraddr) static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; @@ -327,7 +327,7 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; @@ -437,7 +437,7 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) { struct ethtool_pauseparam pauseparam; - if (!dev->ethtool_ops->get_pauseparam) + if (!dev->ethtool_ops->set_pauseparam) return -EOPNOTSUPP; if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) @@ -640,7 +640,7 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; int ret; @@ -673,7 +673,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; @@ -733,7 +733,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) { struct ethtool_stats stats; - struct ethtool_ops *ops = dev->ethtool_ops; + const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; int ret; @@ -806,13 +806,6 @@ int dev_ethtool(struct ifreq *ifr) int rc; unsigned long old_features; - /* - * XXX: This can be pushed down into the ethtool_* handlers that - * need it. Keep existing behaviour for the moment. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (!dev || !netif_device_present(dev)) return -ENODEV; @@ -822,6 +815,27 @@ int dev_ethtool(struct ifreq *ifr) if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; + /* Allow some commands to be done by anyone */ + switch(ethcmd) { + case ETHTOOL_GDRVINFO: + case ETHTOOL_GMSGLVL: + case ETHTOOL_GCOALESCE: + case ETHTOOL_GRINGPARAM: + case ETHTOOL_GPAUSEPARAM: + case ETHTOOL_GRXCSUM: + case ETHTOOL_GTXCSUM: + case ETHTOOL_GSG: + case ETHTOOL_GSTRINGS: + case ETHTOOL_GTSO: + case ETHTOOL_GPERMADDR: + case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + break; + default: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + } + if(dev->ethtool_ops->begin) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; @@ -947,6 +961,10 @@ int dev_ethtool(struct ifreq *ifr) return rc; ioctl: + /* Keep existing behaviour for the moment. */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (dev->do_ioctl) return dev->do_ioctl(dev, ifr, SIOCETHTOOL); return -EOPNOTSUPP; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c new file mode 100644 index 000000000000..a99d87d82b7f --- /dev/null +++ b/net/core/fib_rules.c @@ -0,0 +1,421 @@ +/* + * net/core/fib_rules.c Generic Routing Rules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <net/fib_rules.h> + +static LIST_HEAD(rules_ops); +static DEFINE_SPINLOCK(rules_mod_lock); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops, struct nlmsghdr *nlh, + u32 pid); + +static struct fib_rules_ops *lookup_rules_ops(int family) +{ + struct fib_rules_ops *ops; + + rcu_read_lock(); + list_for_each_entry_rcu(ops, &rules_ops, list) { + if (ops->family == family) { + if (!try_module_get(ops->owner)) + ops = NULL; + rcu_read_unlock(); + return ops; + } + } + rcu_read_unlock(); + + return NULL; +} + +static void rules_ops_put(struct fib_rules_ops *ops) +{ + if (ops) + module_put(ops->owner); +} + +int fib_rules_register(struct fib_rules_ops *ops) +{ + int err = -EEXIST; + struct fib_rules_ops *o; + + if (ops->rule_size < sizeof(struct fib_rule)) + return -EINVAL; + + if (ops->match == NULL || ops->configure == NULL || + ops->compare == NULL || ops->fill == NULL || + ops->action == NULL) + return -EINVAL; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) + if (ops->family == o->family) + goto errout; + + list_add_tail_rcu(&ops->list, &rules_ops); + err = 0; +errout: + spin_unlock(&rules_mod_lock); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_register); + +static void cleanup_ops(struct fib_rules_ops *ops) +{ + struct fib_rule *rule, *tmp; + + list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_del_rcu(&rule->list); + fib_rule_put(rule); + } +} + +int fib_rules_unregister(struct fib_rules_ops *ops) +{ + int err = 0; + struct fib_rules_ops *o; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) { + if (o == ops) { + list_del_rcu(&o->list); + cleanup_ops(ops); + goto out; + } + } + + err = -ENOENT; +out: + spin_unlock(&rules_mod_lock); + + synchronize_rcu(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_unregister); + +int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, + int flags, struct fib_lookup_arg *arg) +{ + struct fib_rule *rule; + int err; + + rcu_read_lock(); + + list_for_each_entry_rcu(rule, ops->rules_list, list) { + if (rule->ifindex && (rule->ifindex != fl->iif)) + continue; + + if (!ops->match(rule, fl, flags)) + continue; + + err = ops->action(rule, fl, flags, arg); + if (err != -EAGAIN) { + fib_rule_get(rule); + arg->rule = rule; + goto out; + } + } + + err = -ENETUNREACH; +out: + rcu_read_unlock(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_lookup); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule, *r, *last = NULL; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + rule = kzalloc(ops->rule_size, GFP_KERNEL); + if (rule == NULL) { + err = -ENOMEM; + goto errout; + } + + if (tb[FRA_PRIORITY]) + rule->pref = nla_get_u32(tb[FRA_PRIORITY]); + + if (tb[FRA_IFNAME]) { + struct net_device *dev; + + rule->ifindex = -1; + nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); + dev = __dev_get_by_name(rule->ifname); + if (dev) + rule->ifindex = dev->ifindex; + } + + rule->action = frh->action; + rule->flags = frh->flags; + rule->table = frh_get_table(frh, tb); + + if (!rule->pref && ops->default_pref) + rule->pref = ops->default_pref(); + + err = ops->configure(rule, skb, nlh, frh, tb); + if (err < 0) + goto errout_free; + + list_for_each_entry(r, ops->rules_list, list) { + if (r->pref > rule->pref) + break; + last = r; + } + + fib_rule_get(rule); + + if (last) + list_add_rcu(&rule->list, &last->list); + else + list_add_rcu(&rule->list, ops->rules_list); + + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + rules_ops_put(ops); + return 0; + +errout_free: + kfree(rule); +errout: + rules_ops_put(ops); + return err; +} + +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + list_for_each_entry(rule, ops->rules_list, list) { + if (frh->action && (frh->action != rule->action)) + continue; + + if (frh->table && (frh_get_table(frh, tb) != rule->table)) + continue; + + if (tb[FRA_PRIORITY] && + (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) + continue; + + if (tb[FRA_IFNAME] && + nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + continue; + + if (!ops->compare(rule, frh, tb)) + continue; + + if (rule->flags & FIB_RULE_PERMANENT) { + err = -EPERM; + goto errout; + } + + list_del_rcu(&rule->list); + synchronize_rcu(); + notify_rule_change(RTM_DELRULE, rule, ops, nlh, + NETLINK_CB(skb).pid); + fib_rule_put(rule); + rules_ops_put(ops); + return 0; + } + + err = -ENOENT; +errout: + rules_ops_put(ops); + return err; +} + +static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, + u32 pid, u32 seq, int type, int flags, + struct fib_rules_ops *ops) +{ + struct nlmsghdr *nlh; + struct fib_rule_hdr *frh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); + if (nlh == NULL) + return -1; + + frh = nlmsg_data(nlh); + frh->table = rule->table; + NLA_PUT_U32(skb, FRA_TABLE, rule->table); + frh->res1 = 0; + frh->res2 = 0; + frh->action = rule->action; + frh->flags = rule->flags; + + if (rule->ifname[0]) + NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + + if (rule->pref) + NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + + if (ops->fill(rule, skb, nlh, frh) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); +} + +int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) +{ + int idx = 0; + struct fib_rule *rule; + struct fib_rules_ops *ops; + + ops = lookup_rules_ops(family); + if (ops == NULL) + return -EAFNOSUPPORT; + + rcu_read_lock(); + list_for_each_entry(rule, ops->rules_list, list) { + if (idx < cb->args[0]) + goto skip; + + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops) < 0) + break; +skip: + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + rules_ops_put(ops); + + return skb->len; +} + +EXPORT_SYMBOL_GPL(fib_rules_dump); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops, struct nlmsghdr *nlh, + u32 pid) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(ops->nlgroup, err); +} + +static void attach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) { + if (rule->ifindex == -1 && + strcmp(dev->name, rule->ifname) == 0) + rule->ifindex = dev->ifindex; + } +} + +static void detach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) + if (rule->ifindex == dev->ifindex) + rule->ifindex = -1; +} + + +static int fib_rules_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + rcu_read_lock(); + + switch (event) { + case NETDEV_REGISTER: + list_for_each_entry(ops, &rules_ops, list) + attach_rules(ops->rules_list, dev); + break; + + case NETDEV_UNREGISTER: + list_for_each_entry(ops, &rules_ops, list) + detach_rules(ops->rules_list, dev); + break; + } + + rcu_read_unlock(); + + return NOTIFY_DONE; +} + +static struct notifier_block fib_rules_notifier = { + .notifier_call = fib_rules_event, +}; + +static int __init fib_rules_init(void) +{ + return register_netdevice_notifier(&fib_rules_notifier); +} + +subsys_initcall(fib_rules_init); diff --git a/net/core/filter.c b/net/core/filter.c index 5b4486a60cf6..6732782a5a40 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (!err) { struct sk_filter *old_fp; - spin_lock_bh(&sk->sk_lock.slock); - old_fp = sk->sk_filter; - sk->sk_filter = fp; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); fp = old_fp; } diff --git a/net/core/flow.c b/net/core/flow.c index 2191af5f26ac..f23e7e386543 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -32,7 +32,6 @@ struct flow_cache_entry { u8 dir; struct flowi key; u32 genid; - u32 sk_sid; void *object; atomic_t *object_ref; }; @@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && - fle->sk_sid == sk_sid && flow_key_compare(key, &fle->key) == 0) { if (fle->genid == atomic_read(&flow_cache_genid)) { void *ret = fle->object; @@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, *head = fle; fle->family = family; fle->dir = dir; - fle->sk_sid = sk_sid; memcpy(&fle->key, key, sizeof(*key)); fle->object = NULL; flow_count(cpu)++; @@ -226,7 +223,7 @@ nocache: void *obj; atomic_t *obj_ref; - resolver(key, sk_sid, family, dir, &obj, &obj_ref); + resolver(key, family, dir, &obj, &obj_ref); if (fle) { fle->genid = atomic_read(&flow_cache_genid); @@ -346,12 +343,8 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - - if (!flow_cachep) - panic("NET: failed to allocate flow cache slab\n"); - flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 0f37266411b5..4b36114744c5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -11,7 +11,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 50a8c73caf97..8ce8c471d868 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -15,7 +15,6 @@ * Harald Welte Add neighbour cache statistics like rtstat */ -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -30,6 +29,8 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/sock.h> +#include <net/netevent.h> +#include <net/netlink.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> @@ -755,6 +756,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); + notify = 1; } } else if (state & NUD_DELAY) { if (time_before_eq(now, @@ -763,6 +765,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); + notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { NEIGH_PRINTK2("neigh %p is probed.\n", neigh); @@ -820,6 +823,8 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) @@ -884,7 +889,7 @@ out_unlock_bh: return rc; } -static __inline__ void neigh_update_hhs(struct neighbour *neigh) +static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = @@ -927,9 +932,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, { u8 old; int err; -#ifdef CONFIG_ARPD int notify = 0; -#endif struct net_device *dev; int update_isrouter = 0; @@ -949,9 +952,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_suspect(neigh); neigh->nud_state = new; err = 0; -#ifdef CONFIG_ARPD notify = old & NUD_VALID; -#endif goto out; } @@ -1023,9 +1024,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1); -#ifdef CONFIG_ARPD notify = 1; -#endif } if (new == old) goto out; @@ -1057,6 +1056,9 @@ out: (neigh->flags & ~NTF_ROUTER); } write_unlock_bh(&neigh->lock); + + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); @@ -1077,7 +1079,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, } static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - u16 protocol) + __be16 protocol) { struct hh_cache *hh; struct net_device *dev = dst->dev; @@ -1337,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) neigh_rand_reach_time(tbl->parms.base_reachable_time); if (!tbl->kmem_cachep) - tbl->kmem_cachep = kmem_cache_create(tbl->id, - tbl->entry_size, - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!tbl->kmem_cachep) - panic("cannot create neighbour cache"); - + tbl->kmem_cachep = + kmem_cache_create(tbl->id, tbl->entry_size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL, NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); @@ -1431,53 +1429,70 @@ int neigh_table_clear(struct neigh_table *tbl) kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; + free_percpu(tbl->stats); + tbl->stats = NULL; + return 0; } int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *dst_attr; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err = -EINVAL; + + if (nlmsg_len(nlh) < sizeof(*ndm)) + goto out; - if (ndm->ndm_ifindex && - (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) + dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); + if (dst_attr == NULL) goto out; + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { + dev = dev_get_by_index(ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + } + read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct rtattr *dst_attr = nda[NDA_DST - 1]; - struct neighbour *n; + struct neighbour *neigh; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - err = -EINVAL; - if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) + if (nla_len(dst_attr) < tbl->key_len) goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); + err = pneigh_delete(tbl, nla_data(dst_attr), dev); goto out_dev_put; } - if (!dev) - goto out; + if (dev == NULL) + goto out_dev_put; - n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); - if (n) { - err = neigh_update(n, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); - neigh_release(n); + neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); + if (neigh == NULL) { + err = -ENOENT; + goto out_dev_put; } + + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE | + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); @@ -1487,76 +1502,93 @@ out: int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err; - if (ndm->ndm_ifindex && - (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) goto out; + err = -EINVAL; + if (tb[NDA_DST] == NULL) + goto out; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { + dev = dev_get_by_index(ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + + if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) + goto out_dev_put; + } + read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; - struct rtattr *dst_attr = nda[NDA_DST - 1]; - int override = 1; - struct neighbour *n; + int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; + struct neighbour *neigh; + void *dst, *lladdr; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - err = -EINVAL; - if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) + if (nla_len(tb[NDA_DST]) < tbl->key_len) goto out_dev_put; + dst = nla_data(tb[NDA_DST]); + lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (ndm->ndm_flags & NTF_PROXY) { + struct pneigh_entry *pn; + err = -ENOBUFS; - if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) + pn = pneigh_lookup(tbl, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; err = 0; + } goto out_dev_put; } - err = -EINVAL; - if (!dev) - goto out; - if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len) + if (dev == NULL) goto out_dev_put; + + neigh = neigh_lookup(tbl, dst, dev); + if (neigh == NULL) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; + goto out_dev_put; + } - n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); - if (n) { - if (nlh->nlmsg_flags & NLM_F_EXCL) { - err = -EEXIST; - neigh_release(n); + neigh = __neigh_lookup_errno(tbl, dst, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); goto out_dev_put; } - - override = nlh->nlmsg_flags & NLM_F_REPLACE; - } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - err = -ENOENT; - goto out_dev_put; } else { - n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); - if (IS_ERR(n)) { - err = PTR_ERR(n); + if (nlh->nlmsg_flags & NLM_F_EXCL) { + err = -EEXIST; + neigh_release(neigh); goto out_dev_put; } - } - err = neigh_update(n, - lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, - ndm->ndm_state, - (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | - NEIGH_UPDATE_F_ADMIN); + if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) + flags &= ~NEIGH_UPDATE_F_OVERRIDE; + } - neigh_release(n); + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); @@ -1566,56 +1598,59 @@ out: static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { - struct rtattr *nest = NULL; - - nest = RTA_NEST(skb, NDTA_PARMS); + struct nlattr *nest; + + nest = nla_nest_start(skb, NDTA_PARMS); + if (nest == NULL) + return -ENOBUFS; if (parms->dev) - RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); - - RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); - RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); - RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); - RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); - RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); - RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); - RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); - RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, + NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); + + NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); + NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); + NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); + NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); + NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); + NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); + NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); + NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, parms->base_reachable_time); - RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); - RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); - RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); - RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); - RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); - RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); + NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); + NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); + NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); + NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); + NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); + NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); - return RTA_NEST_END(skb, nest); + return nla_nest_end(skb, nest); -rtattr_failure: - return RTA_NEST_CANCEL(skb, nest); +nla_put_failure: + return nla_nest_cancel(skb, nest); } -static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, - struct netlink_callback *cb) +static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, + u32 pid, u32 seq, int type, int flags) { struct nlmsghdr *nlh; struct ndtmsg *ndtmsg; - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), - NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; - ndtmsg = NLMSG_DATA(nlh); + ndtmsg = nlmsg_data(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); - RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); - RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); - RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); - RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); + NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); + NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); + NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); + NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); + NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); { unsigned long now = jiffies; @@ -1634,7 +1669,7 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); + NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); } { @@ -1659,55 +1694,50 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, ndst.ndts_forced_gc_runs += st->forced_gc_runs; } - RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); + NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); } BUG_ON(tbl->parms.dev); if (neightbl_fill_parms(skb, &tbl->parms) < 0) - goto rtattr_failure; + goto nla_put_failure; read_unlock_bh(&tbl->lock); - return NLMSG_END(skb, nlh); + return nlmsg_end(skb, nlh); -rtattr_failure: +nla_put_failure: read_unlock_bh(&tbl->lock); - return NLMSG_CANCEL(skb, nlh); - -nlmsg_failure: - return -1; + return nlmsg_cancel(skb, nlh); } -static int neightbl_fill_param_info(struct neigh_table *tbl, +static int neightbl_fill_param_info(struct sk_buff *skb, + struct neigh_table *tbl, struct neigh_parms *parms, - struct sk_buff *skb, - struct netlink_callback *cb) + u32 pid, u32 seq, int type, + unsigned int flags) { struct ndtmsg *ndtmsg; struct nlmsghdr *nlh; - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), - NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; - ndtmsg = NLMSG_DATA(nlh); + ndtmsg = nlmsg_data(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); - if (neightbl_fill_parms(skb, parms) < 0) - goto rtattr_failure; + if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || + neightbl_fill_parms(skb, parms) < 0) + goto errout; read_unlock_bh(&tbl->lock); - return NLMSG_END(skb, nlh); - -rtattr_failure: + return nlmsg_end(skb, nlh); +errout: read_unlock_bh(&tbl->lock); - return NLMSG_CANCEL(skb, nlh); - -nlmsg_failure: - return -1; + return nlmsg_cancel(skb, nlh); } static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, @@ -1723,28 +1753,61 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, return NULL; } +static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { + [NDTA_NAME] = { .type = NLA_STRING }, + [NDTA_THRESH1] = { .type = NLA_U32 }, + [NDTA_THRESH2] = { .type = NLA_U32 }, + [NDTA_THRESH3] = { .type = NLA_U32 }, + [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, + [NDTA_PARMS] = { .type = NLA_NESTED }, +}; + +static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { + [NDTPA_IFINDEX] = { .type = NLA_U32 }, + [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, + [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, + [NDTPA_APP_PROBES] = { .type = NLA_U32 }, + [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, + [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, + [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, + [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, + [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, + [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, + [NDTPA_LOCKTIME] = { .type = NLA_U64 }, +}; + int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct neigh_table *tbl; - struct ndtmsg *ndtmsg = NLMSG_DATA(nlh); - struct rtattr **tb = arg; - int err = -EINVAL; + struct ndtmsg *ndtmsg; + struct nlattr *tb[NDTA_MAX+1]; + int err; - if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1])) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, + nl_neightbl_policy); + if (err < 0) + goto errout; + + if (tb[NDTA_NAME] == NULL) { + err = -EINVAL; + goto errout; + } + ndtmsg = nlmsg_data(nlh); read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; - if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id)) + if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) break; } if (tbl == NULL) { err = -ENOENT; - goto errout; + goto errout_locked; } /* @@ -1753,165 +1816,178 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) */ write_lock_bh(&tbl->lock); - if (tb[NDTA_THRESH1 - 1]) - tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]); - - if (tb[NDTA_THRESH2 - 1]) - tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]); - - if (tb[NDTA_THRESH3 - 1]) - tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]); - - if (tb[NDTA_GC_INTERVAL - 1]) - tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]); - - if (tb[NDTA_PARMS - 1]) { - struct rtattr *tbp[NDTPA_MAX]; + if (tb[NDTA_PARMS]) { + struct nlattr *tbp[NDTPA_MAX+1]; struct neigh_parms *p; - u32 ifindex = 0; + int i, ifindex = 0; - if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0) - goto rtattr_failure; + err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], + nl_ntbl_parm_policy); + if (err < 0) + goto errout_tbl_lock; - if (tbp[NDTPA_IFINDEX - 1]) - ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]); + if (tbp[NDTPA_IFINDEX]) + ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); p = lookup_neigh_params(tbl, ifindex); if (p == NULL) { err = -ENOENT; - goto rtattr_failure; + goto errout_tbl_lock; } - - if (tbp[NDTPA_QUEUE_LEN - 1]) - p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]); - - if (tbp[NDTPA_PROXY_QLEN - 1]) - p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]); - - if (tbp[NDTPA_APP_PROBES - 1]) - p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]); - - if (tbp[NDTPA_UCAST_PROBES - 1]) - p->ucast_probes = - RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]); - if (tbp[NDTPA_MCAST_PROBES - 1]) - p->mcast_probes = - RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]); - - if (tbp[NDTPA_BASE_REACHABLE_TIME - 1]) - p->base_reachable_time = - RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]); + for (i = 1; i <= NDTPA_MAX; i++) { + if (tbp[i] == NULL) + continue; - if (tbp[NDTPA_GC_STALETIME - 1]) - p->gc_staletime = - RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]); + switch (i) { + case NDTPA_QUEUE_LEN: + p->queue_len = nla_get_u32(tbp[i]); + break; + case NDTPA_PROXY_QLEN: + p->proxy_qlen = nla_get_u32(tbp[i]); + break; + case NDTPA_APP_PROBES: + p->app_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_UCAST_PROBES: + p->ucast_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_MCAST_PROBES: + p->mcast_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_BASE_REACHABLE_TIME: + p->base_reachable_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_GC_STALETIME: + p->gc_staletime = nla_get_msecs(tbp[i]); + break; + case NDTPA_DELAY_PROBE_TIME: + p->delay_probe_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_RETRANS_TIME: + p->retrans_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_ANYCAST_DELAY: + p->anycast_delay = nla_get_msecs(tbp[i]); + break; + case NDTPA_PROXY_DELAY: + p->proxy_delay = nla_get_msecs(tbp[i]); + break; + case NDTPA_LOCKTIME: + p->locktime = nla_get_msecs(tbp[i]); + break; + } + } + } - if (tbp[NDTPA_DELAY_PROBE_TIME - 1]) - p->delay_probe_time = - RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]); + if (tb[NDTA_THRESH1]) + tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); - if (tbp[NDTPA_RETRANS_TIME - 1]) - p->retrans_time = - RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]); + if (tb[NDTA_THRESH2]) + tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); - if (tbp[NDTPA_ANYCAST_DELAY - 1]) - p->anycast_delay = - RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]); + if (tb[NDTA_THRESH3]) + tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); - if (tbp[NDTPA_PROXY_DELAY - 1]) - p->proxy_delay = - RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]); - - if (tbp[NDTPA_LOCKTIME - 1]) - p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]); - } + if (tb[NDTA_GC_INTERVAL]) + tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); err = 0; -rtattr_failure: +errout_tbl_lock: write_unlock_bh(&tbl->lock); -errout: +errout_locked: read_unlock(&neigh_tbl_lock); +errout: return err; } int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { - int idx, family; - int s_idx = cb->args[0]; + int family, tidx, nidx = 0; + int tbl_skip = cb->args[0]; + int neigh_skip = cb->args[1]; struct neigh_table *tbl; - family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) { + for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { struct neigh_parms *p; - if (idx < s_idx || (family && tbl->family != family)) + if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(tbl, skb, cb) <= 0) + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, + NLM_F_MULTI) <= 0) break; - for (++idx, p = tbl->parms.next; p; p = p->next, idx++) { - if (idx < s_idx) + for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { + if (nidx < neigh_skip) continue; - if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0) + if (neightbl_fill_param_info(skb, tbl, p, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGHTBL, + NLM_F_MULTI) <= 0) goto out; } + neigh_skip = 0; } out: read_unlock(&neigh_tbl_lock); - cb->args[0] = idx; + cb->args[0] = tidx; + cb->args[1] = nidx; return skb->len; } -static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, - u32 pid, u32 seq, int event, unsigned int flags) +static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, + u32 pid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; - unsigned char *b = skb->tail; struct nda_cacheinfo ci; - int locked = 0; - u32 probes; - struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event, - sizeof(struct ndmsg), flags); - struct ndmsg *ndm = NLMSG_DATA(nlh); + struct nlmsghdr *nlh; + struct ndmsg *ndm; - ndm->ndm_family = n->ops->family; + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = neigh->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = n->flags; - ndm->ndm_type = n->type; - ndm->ndm_ifindex = n->dev->ifindex; - RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); - read_lock_bh(&n->lock); - locked = 1; - ndm->ndm_state = n->nud_state; - if (n->nud_state & NUD_VALID) - RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); - ci.ndm_used = now - n->used; - ci.ndm_confirmed = now - n->confirmed; - ci.ndm_updated = now - n->updated; - ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; - probes = atomic_read(&n->probes); - read_unlock_bh(&n->lock); - locked = 0; - RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); - RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + ndm->ndm_flags = neigh->flags; + ndm->ndm_type = neigh->type; + ndm->ndm_ifindex = neigh->dev->ifindex; + + NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); -nlmsg_failure: -rtattr_failure: - if (locked) - read_unlock_bh(&n->lock); - skb_trim(skb, b - skb->data); - return -1; + read_lock_bh(&neigh->lock); + ndm->ndm_state = neigh->nud_state; + if ((neigh->nud_state & NUD_VALID) && + nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { + read_unlock_bh(&neigh->lock); + goto nla_put_failure; + } + + ci.ndm_used = now - neigh->used; + ci.ndm_confirmed = now - neigh->confirmed; + ci.ndm_updated = now - neigh->updated; + ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; + read_unlock_bh(&neigh->lock); + + NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); + NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } @@ -1955,7 +2031,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) int t, family, s_t; read_lock(&neigh_tbl_lock); - family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; s_t = cb->args[0]; for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { @@ -2334,41 +2410,35 @@ static struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_ARPD -void neigh_app_ns(struct neighbour *n) +static void __neigh_notify(struct neighbour *n, int type, int flags) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + struct sk_buff *skb; + int err = -ENOBUFS; - if (!skb) - return; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) + goto errout; - if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) { + err = neigh_fill_info(skb, n, 0, 0, type, flags); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - nlh = (struct nlmsghdr *)skb->data; - nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_NEIGH, err); } -static void neigh_app_notify(struct neighbour *n) +void neigh_app_ns(struct neighbour *n) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); - - if (!skb) - return; + __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); +} - if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) { - kfree_skb(skb); - return; - } - nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); +static void neigh_app_notify(struct neighbour *n) +{ + __neigh_notify(n, RTM_NEWNEIGH, 0); } #endif /* CONFIG_ARPD */ @@ -2382,7 +2452,7 @@ static struct neigh_sysctl_table { ctl_table neigh_neigh_dir[2]; ctl_table neigh_proto_dir[2]; ctl_table neigh_root_dir[2]; -} neigh_sysctl_template = { +} neigh_sysctl_template __read_mostly = { .neigh_vars = { { .ctl_name = NET_NEIGH_MCAST_SOLICIT, @@ -2655,7 +2725,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p) #endif /* CONFIG_SYSCTL */ EXPORT_SYMBOL(__neigh_event_send); -EXPORT_SYMBOL(neigh_add); EXPORT_SYMBOL(neigh_changeaddr); EXPORT_SYMBOL(neigh_compat_output); EXPORT_SYMBOL(neigh_connected_output); @@ -2675,11 +2744,8 @@ EXPORT_SYMBOL(neigh_table_clear); EXPORT_SYMBOL(neigh_table_init); EXPORT_SYMBOL(neigh_table_init_no_netlink); EXPORT_SYMBOL(neigh_update); -EXPORT_SYMBOL(neigh_update_hhs); EXPORT_SYMBOL(pneigh_enqueue); EXPORT_SYMBOL(pneigh_lookup); -EXPORT_SYMBOL(neightbl_dump_info); -EXPORT_SYMBOL(neightbl_set); #ifdef CONFIG_ARPD EXPORT_SYMBOL(neigh_app_ns); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 47a6fceb6771..f47f319bb7dc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -10,7 +10,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/if_arp.h> @@ -345,8 +344,6 @@ static ssize_t wireless_show(struct class_device *cd, char *buf, if(dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats) iw = dev->wireless_handlers->get_wireless_stats(dev); - else if (dev->get_wireless_stats) - iw = dev->get_wireless_stats(dev); if (iw != NULL) ret = (*format)(iw, buf); } @@ -466,8 +463,7 @@ int netdev_register_sysfs(struct net_device *net) *groups++ = &netstat_group; #ifdef WIRELESS_EXT - if (net->get_wireless_stats - || (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)) + if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif diff --git a/net/core/netevent.c b/net/core/netevent.c new file mode 100644 index 000000000000..35d02c38554e --- /dev/null +++ b/net/core/netevent.c @@ -0,0 +1,69 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker <tom@opengridcomputing.com> + * Steve Wise <swise@opengridcomputing.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include <linux/rtnetlink.h> +#include <linux/notifier.h> + +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9cb781830380..ead5920c26d6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -54,6 +54,7 @@ static atomic_t trapped; sizeof(struct iphdr) + sizeof(struct ethhdr)) static void zap_completion_queue(void); +static void arp_reply(struct sk_buff *skb); static void queue_process(void *p) { @@ -109,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_COMPLETE && !(u16)csum_fold(csum_add(psum, skb->csum))) return 0; @@ -153,6 +154,22 @@ static void poll_napi(struct netpoll *np) } } +static void service_arp_queue(struct netpoll_info *npi) +{ + struct sk_buff *skb; + + if (unlikely(!npi)) + return; + + skb = skb_dequeue(&npi->arp_tx); + + while (skb != NULL) { + arp_reply(skb); + skb = skb_dequeue(&npi->arp_tx); + } + return; +} + void netpoll_poll(struct netpoll *np) { if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) @@ -163,6 +180,8 @@ void netpoll_poll(struct netpoll *np) if (np->dev->poll) poll_napi(np); + service_arp_queue(np->dev->npinfo); + zap_completion_queue(); } @@ -279,14 +298,10 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) * network drivers do not expect to be called if the queue is * stopped. */ - if (netif_queue_stopped(np->dev)) { - netif_tx_unlock(np->dev); - netpoll_poll(np); - udelay(50); - continue; - } + status = NETDEV_TX_BUSY; + if (!netif_queue_stopped(np->dev)) + status = np->dev->hard_start_xmit(skb, np->dev); - status = np->dev->hard_start_xmit(skb, np->dev); netif_tx_unlock(np->dev); /* success */ @@ -446,7 +461,9 @@ int __netpoll_rx(struct sk_buff *skb) int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np = skb->dev->npinfo->rx_np; + struct netpoll_info *npi = skb->dev->npinfo; + struct netpoll *np = npi->rx_np; + if (!np) goto out; @@ -456,7 +473,7 @@ int __netpoll_rx(struct sk_buff *skb) /* check if netpoll clients need ARP */ if (skb->protocol == __constant_htons(ETH_P_ARP) && atomic_read(&trapped)) { - arp_reply(skb); + skb_queue_tail(&npi->arp_tx, skb); return 1; } @@ -651,6 +668,7 @@ int netpoll_setup(struct netpoll *np) npinfo->poll_owner = -1; npinfo->tries = MAX_RETRIES; spin_lock_init(&npinfo->rx_lock); + skb_queue_head_init(&npinfo->arp_tx); } else npinfo = ndev->npinfo; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 67ed14ddabd2..dd023fd28304 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -109,6 +109,8 @@ * * MPLS support by Steven Whitehouse <steve@chygwyn.com> * + * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com> + * */ #include <linux/sys.h> #include <linux/types.h> @@ -137,6 +139,7 @@ #include <linux/inetdevice.h> #include <linux/rtnetlink.h> #include <linux/if_arp.h> +#include <linux/if_vlan.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -157,7 +160,7 @@ #include <asm/div64.h> /* do_div */ #include <asm/timex.h> -#define VERSION "pktgen v2.67: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" /* #define PG_DEBUG(a) a */ #define PG_DEBUG(a) @@ -178,6 +181,8 @@ #define F_TXSIZE_RND (1<<6) /* Transmit size is random */ #define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ #define F_MPLS_RND (1<<8) /* Random MPLS labels */ +#define F_VID_RND (1<<9) /* Random VLAN ID */ +#define F_SVID_RND (1<<10) /* Random SVLAN ID */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -198,6 +203,9 @@ static struct proc_dir_entry *pg_proc_dir = NULL; #define MAX_CFLOWS 65536 +#define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4) +#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) + struct flow_state { __u32 cur_daddr; int count; @@ -284,10 +292,23 @@ struct pktgen_dev { __u16 udp_dst_min; /* inclusive, dest UDP port */ __u16 udp_dst_max; /* exclusive, dest UDP port */ + /* DSCP + ECN */ + __u8 tos; /* six most significant bits of (former) IPv4 TOS are for dscp codepoint */ + __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */ + /* MPLS */ unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ __be32 labels[MAX_MPLS_LABELS]; + /* VLAN/SVLAN (802.1Q/Q-in-Q) */ + __u8 vlan_p; + __u8 vlan_cfi; + __u16 vlan_id; /* 0xffff means no vlan tag */ + + __u8 svlan_p; + __u8 svlan_cfi; + __u16 svlan_id; /* 0xffff means no svlan tag */ + __u32 src_mac_count; /* How many MACs to iterate through */ __u32 dst_mac_count; /* How many MACs to iterate through */ @@ -644,6 +665,24 @@ static int pktgen_if_show(struct seq_file *seq, void *v) i == pkt_dev->nr_labels-1 ? "\n" : ", "); } + if (pkt_dev->vlan_id != 0xffff) { + seq_printf(seq, " vlan_id: %u vlan_p: %u vlan_cfi: %u\n", + pkt_dev->vlan_id, pkt_dev->vlan_p, pkt_dev->vlan_cfi); + } + + if (pkt_dev->svlan_id != 0xffff) { + seq_printf(seq, " svlan_id: %u vlan_p: %u vlan_cfi: %u\n", + pkt_dev->svlan_id, pkt_dev->svlan_p, pkt_dev->svlan_cfi); + } + + if (pkt_dev->tos) { + seq_printf(seq, " tos: 0x%02x\n", pkt_dev->tos); + } + + if (pkt_dev->traffic_class) { + seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); + } + seq_printf(seq, " Flags: "); if (pkt_dev->flags & F_IPV6) @@ -673,6 +712,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_MACDST_RND) seq_printf(seq, "MACDST_RND "); + if (pkt_dev->flags & F_VID_RND) + seq_printf(seq, "VID_RND "); + + if (pkt_dev->flags & F_SVID_RND) + seq_printf(seq, "SVID_RND "); + seq_puts(seq, "\n"); sa = pkt_dev->started_at; @@ -715,12 +760,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) } -static int hex32_arg(const char __user *user_buffer, __u32 *num) +static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32 *num) { int i = 0; *num = 0; - for(; i < 8; i++) { + for(; i < maxlen; i++) { char c; *num <<= 4; if (get_user(c, &user_buffer[i])) @@ -815,7 +860,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) pkt_dev->nr_labels = 0; do { __u32 tmp; - len = hex32_arg(&buffer[i], &tmp); + len = hex32_arg(&buffer[i], 8, &tmp); if (len <= 0) return len; pkt_dev->labels[n] = htonl(tmp); @@ -1140,11 +1185,27 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!MPLS_RND") == 0) pkt_dev->flags &= ~F_MPLS_RND; + else if (strcmp(f, "VID_RND") == 0) + pkt_dev->flags |= F_VID_RND; + + else if (strcmp(f, "!VID_RND") == 0) + pkt_dev->flags &= ~F_VID_RND; + + else if (strcmp(f, "SVID_RND") == 0) + pkt_dev->flags |= F_SVID_RND; + + else if (strcmp(f, "!SVID_RND") == 0) + pkt_dev->flags &= ~F_SVID_RND; + + else if (strcmp(f, "!IPV6") == 0) + pkt_dev->flags &= ~F_IPV6; + else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, - "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n"); + "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); @@ -1445,6 +1506,160 @@ static ssize_t pktgen_if_write(struct file *file, offset += sprintf(pg_result + offset, "%08x%s", ntohl(pkt_dev->labels[n]), n == pkt_dev->nr_labels-1 ? "" : ","); + + if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) { + pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */ + pkt_dev->svlan_id = 0xffff; + + if (debug) + printk("pktgen: VLAN/SVLAN auto turned off\n"); + } + return count; + } + + if (!strcmp(name, "vlan_id")) { + len = num_arg(&user_buffer[i], 4, &value); + if (len < 0) { + return len; + } + i += len; + if (value <= 4095) { + pkt_dev->vlan_id = value; /* turn on VLAN */ + + if (debug) + printk("pktgen: VLAN turned on\n"); + + if (debug && pkt_dev->nr_labels) + printk("pktgen: MPLS auto turned off\n"); + + pkt_dev->nr_labels = 0; /* turn off MPLS */ + sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id); + } else { + pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */ + pkt_dev->svlan_id = 0xffff; + + if (debug) + printk("pktgen: VLAN/SVLAN turned off\n"); + } + return count; + } + + if (!strcmp(name, "vlan_p")) { + len = num_arg(&user_buffer[i], 1, &value); + if (len < 0) { + return len; + } + i += len; + if ((value <= 7) && (pkt_dev->vlan_id != 0xffff)) { + pkt_dev->vlan_p = value; + sprintf(pg_result, "OK: vlan_p=%u", pkt_dev->vlan_p); + } else { + sprintf(pg_result, "ERROR: vlan_p must be 0-7"); + } + return count; + } + + if (!strcmp(name, "vlan_cfi")) { + len = num_arg(&user_buffer[i], 1, &value); + if (len < 0) { + return len; + } + i += len; + if ((value <= 1) && (pkt_dev->vlan_id != 0xffff)) { + pkt_dev->vlan_cfi = value; + sprintf(pg_result, "OK: vlan_cfi=%u", pkt_dev->vlan_cfi); + } else { + sprintf(pg_result, "ERROR: vlan_cfi must be 0-1"); + } + return count; + } + + if (!strcmp(name, "svlan_id")) { + len = num_arg(&user_buffer[i], 4, &value); + if (len < 0) { + return len; + } + i += len; + if ((value <= 4095) && ((pkt_dev->vlan_id != 0xffff))) { + pkt_dev->svlan_id = value; /* turn on SVLAN */ + + if (debug) + printk("pktgen: SVLAN turned on\n"); + + if (debug && pkt_dev->nr_labels) + printk("pktgen: MPLS auto turned off\n"); + + pkt_dev->nr_labels = 0; /* turn off MPLS */ + sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id); + } else { + pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */ + pkt_dev->svlan_id = 0xffff; + + if (debug) + printk("pktgen: VLAN/SVLAN turned off\n"); + } + return count; + } + + if (!strcmp(name, "svlan_p")) { + len = num_arg(&user_buffer[i], 1, &value); + if (len < 0) { + return len; + } + i += len; + if ((value <= 7) && (pkt_dev->svlan_id != 0xffff)) { + pkt_dev->svlan_p = value; + sprintf(pg_result, "OK: svlan_p=%u", pkt_dev->svlan_p); + } else { + sprintf(pg_result, "ERROR: svlan_p must be 0-7"); + } + return count; + } + + if (!strcmp(name, "svlan_cfi")) { + len = num_arg(&user_buffer[i], 1, &value); + if (len < 0) { + return len; + } + i += len; + if ((value <= 1) && (pkt_dev->svlan_id != 0xffff)) { + pkt_dev->svlan_cfi = value; + sprintf(pg_result, "OK: svlan_cfi=%u", pkt_dev->svlan_cfi); + } else { + sprintf(pg_result, "ERROR: svlan_cfi must be 0-1"); + } + return count; + } + + if (!strcmp(name, "tos")) { + __u32 tmp_value = 0; + len = hex32_arg(&user_buffer[i], 2, &tmp_value); + if (len < 0) { + return len; + } + i += len; + if (len == 2) { + pkt_dev->tos = tmp_value; + sprintf(pg_result, "OK: tos=0x%02x", pkt_dev->tos); + } else { + sprintf(pg_result, "ERROR: tos must be 00-ff"); + } + return count; + } + + if (!strcmp(name, "traffic_class")) { + __u32 tmp_value = 0; + len = hex32_arg(&user_buffer[i], 2, &tmp_value); + if (len < 0) { + return len; + } + i += len; + if (len == 2) { + pkt_dev->traffic_class = tmp_value; + sprintf(pg_result, "OK: traffic_class=0x%02x", pkt_dev->traffic_class); + } else { + sprintf(pg_result, "ERROR: traffic_class must be 00-ff"); + } return count; } @@ -1786,7 +2001,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) * use ipv6_get_lladdr if/when it's get exported */ - read_lock(&addrconf_lock); + rcu_read_lock(); if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1805,7 +2020,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); if (err) printk("pktgen: ERROR: IPv6 link address not availble.\n"); } @@ -1949,6 +2164,14 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) htonl(0x000fffff)); } + if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) { + pkt_dev->vlan_id = pktgen_random() % 4096; + } + + if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) { + pkt_dev->svlan_id = pktgen_random() % 4096; + } + if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { if (pkt_dev->flags & F_UDPSRC_RND) pkt_dev->cur_udp_src = @@ -2092,10 +2315,18 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_hdr *pgh = NULL; __be16 protocol = __constant_htons(ETH_P_IP); __be32 *mpls; + __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ + __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ + __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ + __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ + if (pkt_dev->nr_labels) protocol = __constant_htons(ETH_P_MPLS_UC); + if (pkt_dev->vlan_id != 0xffff) + protocol = __constant_htons(ETH_P_8021Q); + /* Update any of the values, used when we're incrementing various * fields. */ @@ -2103,7 +2334,9 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + - pkt_dev->nr_labels*sizeof(u32), GFP_ATOMIC); + pkt_dev->nr_labels*sizeof(u32) + + VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), + GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2116,6 +2349,24 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32)); if (pkt_dev->nr_labels) mpls_push(mpls, pkt_dev); + + if (pkt_dev->vlan_id != 0xffff) { + if(pkt_dev->svlan_id != 0xffff) { + svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + *svlan_tci = htons(pkt_dev->svlan_id); + *svlan_tci |= pkt_dev->svlan_p << 5; + *svlan_tci |= pkt_dev->svlan_cfi << 4; + svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q); + } + vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + *vlan_tci = htons(pkt_dev->vlan_id); + *vlan_tci |= pkt_dev->vlan_p << 5; + *vlan_tci |= pkt_dev->vlan_cfi << 4; + vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + *vlan_encapsulated_proto = __constant_htons(ETH_P_IP); + } + iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); @@ -2124,7 +2375,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - - pkt_dev->nr_labels*sizeof(u32); + pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); if (datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); @@ -2136,7 +2387,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->ihl = 5; iph->version = 4; iph->ttl = 32; - iph->tos = 0; + iph->tos = pkt_dev->tos; iph->protocol = IPPROTO_UDP; /* UDP */ iph->saddr = pkt_dev->cur_saddr; iph->daddr = pkt_dev->cur_daddr; @@ -2146,9 +2397,12 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = 0; iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; - skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32); + skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) - + VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.iph = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); @@ -2216,7 +2470,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_sec = htonl(timestamp.tv_sec); pgh->tv_usec = htonl(timestamp.tv_usec); } - pkt_dev->seq_num++; return skb; } @@ -2400,17 +2653,26 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct pktgen_hdr *pgh = NULL; __be16 protocol = __constant_htons(ETH_P_IPV6); __be32 *mpls; + __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ + __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ + __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ + __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ if (pkt_dev->nr_labels) protocol = __constant_htons(ETH_P_MPLS_UC); + if (pkt_dev->vlan_id != 0xffff) + protocol = __constant_htons(ETH_P_8021Q); + /* Update any of the values, used when we're incrementing various * fields. */ mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + - pkt_dev->nr_labels*sizeof(u32), GFP_ATOMIC); + pkt_dev->nr_labels*sizeof(u32) + + VLAN_TAG_SIZE(pkt_dev) + SVLAN_TAG_SIZE(pkt_dev), + GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -2423,16 +2685,34 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mpls = (__be32 *)skb_put(skb, pkt_dev->nr_labels*sizeof(__u32)); if (pkt_dev->nr_labels) mpls_push(mpls, pkt_dev); + + if (pkt_dev->vlan_id != 0xffff) { + if(pkt_dev->svlan_id != 0xffff) { + svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + *svlan_tci = htons(pkt_dev->svlan_id); + *svlan_tci |= pkt_dev->svlan_p << 5; + *svlan_tci |= pkt_dev->svlan_cfi << 4; + svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q); + } + vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); + *vlan_tci = htons(pkt_dev->vlan_id); + *vlan_tci |= pkt_dev->vlan_p << 5; + *vlan_tci |= pkt_dev->vlan_cfi << 4; + vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); + *vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6); + } + iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr)); udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = __constant_htons(ETH_P_IPV6); + *(u16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - - pkt_dev->nr_labels*sizeof(u32); + pkt_dev->nr_labels*sizeof(u32) - VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); @@ -2448,6 +2728,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *(u32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ + if (pkt_dev->traffic_class) { + /* Version + traffic class + flow (0) */ + *(u32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); + } + iph->hop_limit = 32; iph->payload_len = htons(sizeof(struct udphdr) + datalen); @@ -2456,10 +2741,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); - skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32); + skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) - + VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.ipv6h = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); @@ -2527,7 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, pgh->tv_sec = htonl(timestamp.tv_sec); pgh->tv_usec = htonl(timestamp.tv_usec); } - pkt_dev->seq_num++; + /* pkt_dev->seq_num++; FF: you really mean this? */ return skb; } @@ -3173,6 +3461,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->udp_dst_min = 9; pkt_dev->udp_dst_max = 9; + pkt_dev->vlan_p = 0; + pkt_dev->vlan_cfi = 0; + pkt_dev->vlan_id = 0xffff; + pkt_dev->svlan_p = 0; + pkt_dev->svlan_cfi = 0; + pkt_dev->svlan_id = 0xffff; + strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); if (!pktgen_setup_dev(pkt_dev)) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3fcfa9c59e1f..221e4038216b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -16,7 +16,6 @@ * Vitaly E. Lavrov RTA_OK arithmetics was wrong. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/types.h> @@ -36,6 +35,7 @@ #include <linux/init.h> #include <linux/security.h> #include <linux/mutex.h> +#include <linux/if_addr.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -50,6 +50,7 @@ #include <net/udp.h> #include <net/sock.h> #include <net/pkt_sched.h> +#include <net/fib_rules.h> #include <net/netlink.h> #ifdef CONFIG_NET_WIRELESS_RTNETLINK #include <linux/wireless.h> @@ -57,6 +58,7 @@ #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ static DEFINE_MUTEX(rtnl_mutex); +static struct sock *rtnl; void rtnl_lock(void) { @@ -94,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } -struct sock *rtnl; - struct rtnetlink_link * rtnetlink_links[NPROTO]; static const int rtm_min[RTM_NR_FAMILIES] = @@ -103,8 +103,7 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), + [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -112,7 +111,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)), }; static const int rta_max[RTM_NR_FAMILIES] = @@ -120,13 +118,11 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, - [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, + [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, - [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX, }; void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) @@ -169,24 +165,52 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) return err; } +int rtnl_unicast(struct sk_buff *skb, u32 pid) +{ + return nlmsg_unicast(rtnl, skb, pid); +} + +int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) +{ + int report = 0; + + if (nlh) + report = nlmsg_report(nlh); + + return nlmsg_notify(rtnl, skb, pid, group, report, flags); +} + +void rtnl_set_sk_err(u32 group, int error) +{ + netlink_set_err(rtnl, 0, group, error); +} + int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { - struct rtattr *mx = (struct rtattr*)skb->tail; - int i; + struct nlattr *mx; + int i, valid = 0; + + mx = nla_nest_start(skb, RTA_METRICS); + if (mx == NULL) + return -ENOBUFS; + + for (i = 0; i < RTAX_MAX; i++) { + if (metrics[i]) { + valid++; + NLA_PUT_U32(skb, i+1, metrics[i]); + } + } - RTA_PUT(skb, RTA_METRICS, 0, NULL); - for (i=0; i<RTAX_MAX; i++) { - if (metrics[i]) - RTA_PUT(skb, i+1, sizeof(u32), metrics+i); + if (!valid) { + nla_nest_cancel(skb, mx); + return 0; } - mx->rta_len = skb->tail - (u8*)mx; - if (mx->rta_len == RTA_LENGTH(0)) - skb_trim(skb, (u8*)mx - skb->data); - return 0; -rtattr_failure: - skb_trim(skb, (u8*)mx - skb->data); - return -1; + return nla_nest_end(skb, mx); + +nla_put_failure: + return nla_nest_cancel(skb, mx); } @@ -217,41 +241,73 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } -static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, - int type, u32 pid, u32 seq, u32 change, - unsigned int flags) +static void copy_rtnl_link_stats(struct rtnl_link_stats *a, + struct net_device_stats *b) { - struct ifinfomsg *r; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags); - r = NLMSG_DATA(nlh); - r->ifi_family = AF_UNSPEC; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); - r->ifi_change = change; - - RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); - - if (1) { - u32 txqlen = dev->tx_queue_len; - RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen); - } + a->rx_packets = b->rx_packets; + a->tx_packets = b->tx_packets; + a->rx_bytes = b->rx_bytes; + a->tx_bytes = b->tx_bytes; + a->rx_errors = b->rx_errors; + a->tx_errors = b->tx_errors; + a->rx_dropped = b->rx_dropped; + a->tx_dropped = b->tx_dropped; + + a->multicast = b->multicast; + a->collisions = b->collisions; + + a->rx_length_errors = b->rx_length_errors; + a->rx_over_errors = b->rx_over_errors; + a->rx_crc_errors = b->rx_crc_errors; + a->rx_frame_errors = b->rx_frame_errors; + a->rx_fifo_errors = b->rx_fifo_errors; + a->rx_missed_errors = b->rx_missed_errors; + + a->tx_aborted_errors = b->tx_aborted_errors; + a->tx_carrier_errors = b->tx_carrier_errors; + a->tx_fifo_errors = b->tx_fifo_errors; + a->tx_heartbeat_errors = b->tx_heartbeat_errors; + a->tx_window_errors = b->tx_window_errors; + + a->rx_compressed = b->rx_compressed; + a->tx_compressed = b->tx_compressed; +}; - if (1) { - u32 weight = dev->weight; - RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight); - } +static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, + void *iwbuf, int iwbuflen, int type, u32 pid, + u32 seq, u32 change, unsigned int flags) +{ + struct ifinfomsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ifm = nlmsg_data(nlh); + ifm->ifi_family = AF_UNSPEC; + ifm->__ifi_pad = 0; + ifm->ifi_type = dev->type; + ifm->ifi_index = dev->ifindex; + ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_change = change; + + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); + NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); + NLA_PUT_U8(skb, IFLA_OPERSTATE, + netif_running(dev) ? dev->operstate : IF_OPER_DOWN); + NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); + NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + + if (dev->ifindex != dev->iflink) + NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); + + if (dev->master) + NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - if (1) { - u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN; - u8 link_mode = dev->link_mode; - RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); - RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode); - } + if (dev->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); if (1) { struct rtnl_link_ifmap map = { @@ -262,58 +318,38 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, .dma = dev->dma, .port = dev->if_port, }; - RTA_PUT(skb, IFLA_MAP, sizeof(map), &map); + NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); } if (dev->addr_len) { - RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); - } - - if (1) { - u32 mtu = dev->mtu; - RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); - } - - if (dev->ifindex != dev->iflink) { - u32 iflink = dev->iflink; - RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink); - } - - if (dev->qdisc_sleeping) - RTA_PUT(skb, IFLA_QDISC, - strlen(dev->qdisc_sleeping->ops->id) + 1, - dev->qdisc_sleeping->ops->id); - - if (dev->master) { - u32 master = dev->master->ifindex; - RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master); + NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } if (dev->get_stats) { - unsigned long *stats = (unsigned long*)dev->get_stats(dev); + struct net_device_stats *stats = dev->get_stats(dev); if (stats) { - struct rtattr *a; - __u32 *s; - int i; - int n = sizeof(struct rtnl_link_stats)/4; - - a = __RTA_PUT(skb, IFLA_STATS, n*4); - s = RTA_DATA(a); - for (i=0; i<n; i++) - s[i] = stats[i]; + struct nlattr *attr; + + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (attr == NULL) + goto nla_put_failure; + + copy_rtnl_link_stats(nla_data(attr), stats); } } - nlh->nlmsg_len = skb->tail - b; - return skb->len; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + if (iwbuf) + NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } -static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->args[0]; @@ -323,10 +359,9 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; - if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI) <= 0) + if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) break; } read_unlock(&dev_base_lock); @@ -335,52 +370,70 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c return skb->len; } -static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { + [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, + [IFLA_MTU] = { .type = NLA_U32 }, + [IFLA_TXQLEN] = { .type = NLA_U32 }, + [IFLA_WEIGHT] = { .type = NLA_U32 }, + [IFLA_OPERSTATE] = { .type = NLA_U8 }, + [IFLA_LINKMODE] = { .type = NLA_U8 }, +}; + +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ifinfomsg *ifm = NLMSG_DATA(nlh); - struct rtattr **ida = arg; + struct ifinfomsg *ifm; struct net_device *dev; - int err, send_addr_notify = 0; + int err, send_addr_notify = 0, modified = 0; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; + + err = -EINVAL; + ifm = nlmsg_data(nlh); if (ifm->ifi_index >= 0) dev = dev_get_by_index(ifm->ifi_index); - else if (ida[IFLA_IFNAME - 1]) { - char ifname[IFNAMSIZ]; - - if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], - IFNAMSIZ) >= IFNAMSIZ) - return -EINVAL; + else if (tb[IFLA_IFNAME]) dev = dev_get_by_name(ifname); - } else - return -EINVAL; + else + goto errout; - if (!dev) - return -ENODEV; + if (dev == NULL) { + err = -ENODEV; + goto errout; + } - err = -EINVAL; + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; - if (ifm->ifi_flags) - dev_change_flags(dev, ifm->ifi_flags); + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; - if (ida[IFLA_MAP - 1]) { + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; if (!dev->set_config) { err = -EOPNOTSUPP; - goto out; + goto errout_dev; } if (!netif_device_present(dev)) { err = -ENODEV; - goto out; + goto errout_dev; } - - if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map))) - goto out; - - u_map = RTA_DATA(ida[IFLA_MAP - 1]); + u_map = nla_data(tb[IFLA_MAP]); k_map.mem_start = (unsigned long) u_map->mem_start; k_map.mem_end = (unsigned long) u_map->mem_end; k_map.base_addr = (unsigned short) u_map->base_addr; @@ -389,187 +442,175 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) k_map.port = (unsigned char) u_map->port; err = dev->set_config(dev, &k_map); + if (err < 0) + goto errout_dev; - if (err) - goto out; + modified = 1; } - if (ida[IFLA_ADDRESS - 1]) { + if (tb[IFLA_ADDRESS]) { + struct sockaddr *sa; + int len; + if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto out; + goto errout_dev; } + if (!netif_device_present(dev)) { err = -ENODEV; - goto out; + goto errout_dev; } - if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) - goto out; - err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1])); + len = sizeof(sa_family_t) + dev->addr_len; + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; + goto errout_dev; + } + sa->sa_family = dev->type; + memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), + dev->addr_len); + err = dev->set_mac_address(dev, sa); + kfree(sa); if (err) - goto out; + goto errout_dev; send_addr_notify = 1; + modified = 1; } - if (ida[IFLA_BROADCAST - 1]) { - if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len)) - goto out; - memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]), - dev->addr_len); - send_addr_notify = 1; + if (tb[IFLA_MTU]) { + err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + if (err < 0) + goto errout_dev; + modified = 1; } - if (ida[IFLA_MTU - 1]) { - if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32))) - goto out; - err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1]))); - - if (err) - goto out; - + /* + * Interface selected by interface index but interface + * name provided implies that a name change has been + * requested. + */ + if (ifm->ifi_index >= 0 && ifname[0]) { + err = dev_change_name(dev, ifname); + if (err < 0) + goto errout_dev; + modified = 1; } - if (ida[IFLA_TXQLEN - 1]) { - if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32))) - goto out; +#ifdef CONFIG_NET_WIRELESS_RTNETLINK + if (tb[IFLA_WIRELESS]) { + /* Call Wireless Extensions. + * Various stuff checked in there... */ + err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]), + nla_len(tb[IFLA_WIRELESS])); + if (err < 0) + goto errout_dev; + } +#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1])); + if (tb[IFLA_BROADCAST]) { + nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); + send_addr_notify = 1; } - if (ida[IFLA_WEIGHT - 1]) { - if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32))) - goto out; - dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); - } + if (ifm->ifi_flags) + dev_change_flags(dev, ifm->ifi_flags); - if (ida[IFLA_OPERSTATE - 1]) { - if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) - goto out; + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1]))); - } + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (ida[IFLA_LINKMODE - 1]) { - if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) - goto out; + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) { write_lock_bh(&dev_base_lock); - dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1])); + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); write_unlock_bh(&dev_base_lock); } - if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { - char ifname[IFNAMSIZ]; - - if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], - IFNAMSIZ) >= IFNAMSIZ) - goto out; - err = dev_change_name(dev, ifname); - if (err) - goto out; - } - -#ifdef CONFIG_NET_WIRELESS_RTNETLINK - if (ida[IFLA_WIRELESS - 1]) { - - /* Call Wireless Extensions. - * Various stuff checked in there... */ - err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len); - if (err) - goto out; - } -#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - err = 0; -out: +errout_dev: + if (err < 0 && modified && net_ratelimit()) + printk(KERN_WARNING "A link change request failed with " + "some changes comitted already. Interface %s may " + "have been left with an inconsistent configuration, " + "please check.\n", dev->name); + if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); dev_put(dev); +errout: return err; } -#ifdef CONFIG_NET_WIRELESS_RTNETLINK -static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct ifinfomsg *ifm = NLMSG_DATA(in_nlh); - struct rtattr **ida = arg; - struct net_device *dev; - struct ifinfomsg *r; - struct nlmsghdr *nlh; - int err = -ENOBUFS; - struct sk_buff *skb; - unsigned char *b; - char *iw_buf = NULL; + struct ifinfomsg *ifm; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *dev = NULL; + struct sk_buff *nskb; + char *iw_buf = NULL, *iw = NULL; int iw_buf_len = 0; + int err, payload; - if (ifm->ifi_index >= 0) + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index >= 0) { dev = dev_get_by_index(ifm->ifi_index); - else + if (dev == NULL) + return -ENODEV; + } else return -EINVAL; - if (!dev) - return -ENODEV; -#ifdef CONFIG_NET_WIRELESS_RTNETLINK - if (ida[IFLA_WIRELESS - 1]) { +#ifdef CONFIG_NET_WIRELESS_RTNETLINK + if (tb[IFLA_WIRELESS]) { /* Call Wireless Extensions. We need to know the size before * we can alloc. Various stuff checked in there... */ - err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len); - if (err) - goto out; + err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]), + nla_len(tb[IFLA_WIRELESS]), + &iw_buf, &iw_buf_len); + if (err < 0) + goto errout; + + iw += IW_EV_POINT_OFF; } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - /* Create a skb big enough to include all the data. - * Some requests are way bigger than 4k... Jean II */ - skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)), - GFP_KERNEL); - if (!skb) - goto out; - b = skb->tail; - - /* Put in the message the usual good stuff */ - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq, - RTM_NEWLINK, sizeof(*r)); - r = NLMSG_DATA(nlh); - r->ifi_family = AF_UNSPEC; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev->flags; - r->ifi_change = 0; - - /* Put the wireless payload if it exist */ - if(iw_buf != NULL) - RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len, - iw_buf + IW_EV_POINT_OFF); - - nlh->nlmsg_len = skb->tail - b; - - /* Needed ? */ - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; -out: - if(iw_buf != NULL) - kfree(iw_buf); + payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + + nla_total_size(iw_buf_len)); + nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + if (nskb == NULL) { + err = -ENOBUFS; + goto errout; + } + + err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, + NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); + if (err <= 0) { + kfree_skb(nskb); + goto errout; + } + + err = rtnl_unicast(skb, NETLINK_CB(skb).pid); +errout: + kfree(iw_buf); dev_put(dev); - return err; -rtattr_failure: -nlmsg_failure: - kfree_skb(skb); - goto out; + return err; } -#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ -static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) +static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->family; @@ -596,20 +637,22 @@ static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + - sizeof(struct rtnl_link_ifmap) + - sizeof(struct rtnl_link_stats) + 128); + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; - if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { + err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); + + err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ @@ -663,7 +706,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) sz_idx = type>>2; kind = type&3; - if (kind != 2 && security_netlink_recv(skb)) { + if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) { *errp = -EPERM; return -1; } @@ -734,18 +777,19 @@ static void rtnetlink_rcv(struct sock *sk, int len) static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = { - [RTM_GETLINK - RTM_BASE] = { -#ifdef CONFIG_NET_WIRELESS_RTNETLINK - .doit = do_getlink, -#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - .dumpit = rtnetlink_dump_ifinfo }, - [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, - [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, + [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink, + .dumpit = rtnl_dump_ifinfo }, + [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink }, + [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all }, + [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all }, [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, - [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, +#ifdef CONFIG_FIB_RULES + [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, + [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, +#endif + [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all }, [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, }; @@ -805,7 +849,9 @@ EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); EXPORT_SYMBOL(rtnetlink_links); EXPORT_SYMBOL(rtnetlink_put_metrics); -EXPORT_SYMBOL(rtnl); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); +EXPORT_SYMBOL(rtnl_unicast); +EXPORT_SYMBOL(rtnl_notify); +EXPORT_SYMBOL(rtnl_set_sk_err); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8e5044ba3ab6..c448c7f6fde2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -38,7 +38,6 @@ * The functions in this file will not compile correctly with gcc 2.4.x */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -250,12 +249,37 @@ nodata: goto out; } +/** + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. + */ +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask) +{ + struct sk_buff *skb; -static void skb_drop_fraglist(struct sk_buff *skb) + skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + return skb; +} + +static void skb_drop_list(struct sk_buff **listp) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *list = *listp; - skb_shinfo(skb)->frag_list = NULL; + *listp = NULL; do { struct sk_buff *this = list; @@ -264,6 +288,11 @@ static void skb_drop_fraglist(struct sk_buff *skb) } while (list); } +static inline void skb_drop_fraglist(struct sk_buff *skb) +{ + skb_drop_list(&skb_shinfo(skb)->frag_list); +} + static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; @@ -272,7 +301,7 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -void skb_release_data(struct sk_buff *skb) +static void skb_release_data(struct sk_buff *skb) { if (!skb->cloned || !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, @@ -824,41 +853,81 @@ free_skb: int ___pskb_trim(struct sk_buff *skb, unsigned int len) { + struct sk_buff **fragp; + struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; + int err; - for (i = 0; i < nfrags; i++) { + if (skb_cloned(skb) && + unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) + return err; + + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len - offset; - } + + if (end < len) { + offset = end; + continue; } - offset = end; + + skb_shinfo(skb)->frags[i++].size = len - offset; + +drop_pages: + skb_shinfo(skb)->nr_frags = i; + + for (; i < nfrags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + goto done; + } + + for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); + fragp = &frag->next) { + int end = offset + frag->len; + + if (skb_shared(frag)) { + struct sk_buff *nfrag; + + nfrag = skb_clone(frag, GFP_ATOMIC); + if (unlikely(!nfrag)) + return -ENOMEM; + + nfrag->next = frag->next; + kfree_skb(frag); + frag = nfrag; + *fragp = frag; + } + + if (end < len) { + offset = end; + continue; + } + + if (end > len && + unlikely((err = pskb_trim(frag, len - offset)))) + return err; + + if (frag->next) + skb_drop_list(&frag->next); + break; } - if (offset < len) { +done: + if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; } return 0; @@ -1328,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) unsigned int csum; long csstart; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb->h.raw - skb->data; else csstart = skb_headlen(skb); @@ -1342,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart, 0); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum; *((unsigned short *)(to + csstuff)) = csum_fold(csum); @@ -1739,12 +1808,15 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state) { + unsigned int ret; + config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); - return textsearch_find(config, state); + ret = textsearch_find(config, state); + return (ret <= to - from ? ret : UINT_MAX); } /** @@ -1826,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, * @len: length of data pulled * * This function performs an skb_pull on the packet and updates - * update the CHECKSUM_HW checksum. It should be used on receive - * path processing instead of skb_pull unless you know that the - * checksum difference is zero (e.g., a valid IP header) or you - * are setting ip_summed to CHECKSUM_NONE. + * update the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_pull unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { @@ -1845,13 +1917,13 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** * skb_segment - Perform protocol segmentation on skb. * @skb: buffer to segment - * @sg: whether scatter-gather can be used for generated segments + * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * the segment at the given position. It returns NULL if there are * no more segments to generate, or when an error is encountered. */ -struct sk_buff *skb_segment(struct sk_buff *skb, int sg) +struct sk_buff *skb_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -1860,6 +1932,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int sg) unsigned int offset = doffset; unsigned int headroom; unsigned int len; + int sg = features & NETIF_F_SG; int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; @@ -1921,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int sg) frag = skb_shinfo(nskb)->frags; k = 0; - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum = skb->csum; memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); @@ -1973,19 +2046,14 @@ void __init skb_init(void) skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!skbuff_head_cache) - panic("cannot create skbuff cache"); - skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!skbuff_fclone_cache) - panic("cannot create skbuff cache"); } EXPORT_SYMBOL(___pskb_trim); @@ -1993,6 +2061,7 @@ EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); EXPORT_SYMBOL(__alloc_skb); +EXPORT_SYMBOL(__netdev_alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); diff --git a/net/core/sock.c b/net/core/sock.c index 5d820c376653..b77e155cbe6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -92,7 +92,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -130,6 +129,53 @@ #include <net/tcp.h> #endif +/* + * Each address family might have different locking rules, so we have + * one slock key per address family: + */ +static struct lock_class_key af_family_keys[AF_MAX]; +static struct lock_class_key af_family_slock_keys[AF_MAX]; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * Make lock validator output more readable. (we pre-construct these + * strings build-time, so that runtime initialization of socket + * locks is fast): + */ +static const char *af_family_key_strings[AF_MAX+1] = { + "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , + "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", + "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , + "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , + "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , + "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , + "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , + "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , + "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , + "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , + "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX" +}; +static const char *af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , + "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", + "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , + "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , + "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , + "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , + "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , + "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , + "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , + "slock-27" , "slock-28" , "slock-29" , + "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX" +}; +#endif + +/* + * sk_callback_lock locking rules are per-address-family, + * so split the lock classes by using a per-AF key: + */ +static struct lock_class_key af_callback_keys[AF_MAX]; + /* Take into consideration the size of the struct sk_buff overhead in the * determination of these values, since that is non-constant across * platforms. This makes socket queueing behavior and performance @@ -141,13 +187,13 @@ #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) /* Run time adjustable parameters. */ -__u32 sysctl_wmem_max = SK_WMEM_MAX; -__u32 sysctl_rmem_max = SK_RMEM_MAX; -__u32 sysctl_wmem_default = SK_WMEM_MAX; -__u32 sysctl_rmem_default = SK_RMEM_MAX; +__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; +__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; +__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; +__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; /* Maximal space eaten by iovec or ancilliary data plus some space */ -int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); +int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { @@ -201,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; } - /* It would be deadlock, if sock_queue_rcv_skb is used - with socket lock! We assume that users of this - function are lock free. - */ - err = sk_filter(sk, skb, 1); + err = sk_filter(sk, skb); if (err) goto out; @@ -232,15 +274,22 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) { int rc = NET_RX_SUCCESS; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { + /* + * trylock + unlock semantics: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); + rc = sk->sk_backlog_rcv(sk, skb); - else + + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: @@ -553,18 +602,25 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - spin_lock_bh(&sk->sk_lock.slock); - filter = sk->sk_filter; + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); if (filter) { - sk->sk_filter = NULL; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_release(sk, filter); + rcu_read_unlock_bh(); break; } - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_unlock_bh(); ret = -ENONET; break; + case SO_PASSSEC: + if (valbool) + set_bit(SOCK_PASSSEC, &sock->flags); + else + clear_bit(SOCK_PASSSEC, &sock->flags); + break; + /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ default: @@ -723,6 +779,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_state == TCP_LISTEN; break; + case SO_PASSSEC: + v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; + break; + case SO_PEERSEC: return security_socket_getpeersec_stream(sock, optval, optlen, len); @@ -739,6 +799,33 @@ lenout: return 0; } +/* + * Initialize an sk_lock. + * + * (We also register the sk_lock with the lock validator.) + */ +static void inline sock_lock_init(struct sock *sk) +{ + spin_lock_init(&sk->sk_lock.slock); + sk->sk_lock.owner = NULL; + init_waitqueue_head(&sk->sk_lock.wq); + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); + + /* + * Mark both the sk_lock and the sk_lock.slock as a + * per-address-family lock class: + */ + lockdep_set_class_and_name(&sk->sk_lock.slock, + af_family_slock_keys + sk->sk_family, + af_family_slock_key_strings[sk->sk_family]); + lockdep_init_map(&sk->sk_lock.dep_map, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); +} + /** * sk_alloc - All socket objects are allocated here * @family: protocol family @@ -793,10 +880,10 @@ void sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = sk->sk_filter; + filter = rcu_dereference(sk->sk_filter); if (filter) { sk_filter_release(sk, filter); - sk->sk_filter = NULL; + rcu_assign_pointer(sk->sk_filter, NULL); } sock_disable_timestamp(sk); @@ -820,7 +907,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) if (newsk != NULL) { struct sk_filter *filter; - memcpy(newsk, sk, sk->sk_prot->obj_size); + sock_copy(newsk, sk); /* SANITY */ sk_node_init(&newsk->sk_node); @@ -838,6 +925,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); + lockdep_set_class(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1412,6 +1501,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); + lockdep_set_class(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1439,24 +1530,34 @@ void sock_init_data(struct socket *sock, struct sock *sk) void fastcall lock_sock(struct sock *sk) { might_sleep(); - spin_lock_bh(&(sk->sk_lock.slock)); + spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_lock.owner) __lock_sock(sk); sk->sk_lock.owner = (void *)1; - spin_unlock_bh(&(sk->sk_lock.slock)); + spin_unlock(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + local_bh_enable(); } EXPORT_SYMBOL(lock_sock); void fastcall release_sock(struct sock *sk) { - spin_lock_bh(&(sk->sk_lock.slock)); + /* + * The sk_lock has mutex_unlock() semantics: + */ + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + + spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&(sk->sk_lock.wq))) - wake_up(&(sk->sk_lock.wq)); - spin_unlock_bh(&(sk->sk_lock.slock)); + if (waitqueue_active(&sk->sk_lock.wq)) + wake_up(&sk->sk_lock.wq); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/core/stream.c b/net/core/stream.c index e9489696f694..d1d7decf70b0 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -196,15 +196,13 @@ EXPORT_SYMBOL(sk_stream_error); void __sk_stream_mem_reclaim(struct sock *sk) { - if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) { - atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM, - sk->sk_prot->memory_allocated); - sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; - if (*sk->sk_prot->memory_pressure && - (atomic_read(sk->sk_prot->memory_allocated) < - sk->sk_prot->sysctl_mem[0])) - *sk->sk_prot->memory_pressure = 0; - } + atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM, + sk->sk_prot->memory_allocated); + sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; + if (*sk->sk_prot->memory_pressure && + (atomic_read(sk->sk_prot->memory_allocated) < + sk->sk_prot->sysctl_mem[0])) + *sk->sk_prot->memory_pressure = 0; } EXPORT_SYMBOL(__sk_stream_mem_reclaim); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 710453656721..02534131d88e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -7,7 +7,6 @@ #include <linux/mm.h> #include <linux/sysctl.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/socket.h> #include <net/sock.h> diff --git a/net/core/user_dma.c b/net/core/user_dma.c index b7c98dbcdb81..248a6b666aff 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -29,6 +29,7 @@ #include <linux/socket.h> #include <linux/rtnetlink.h> /* for BUG_TRAP */ #include <net/tcp.h> +#include <net/netdma.h> #define NET_DMA_DEFAULT_COPYBREAK 4096 diff --git a/net/core/utils.c b/net/core/utils.c index 4f96f389243d..94c5d761c830 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -3,7 +3,8 @@ * * Authors: * net_random Alan Cox - * net_ratelimit Andy Kleen + * net_ratelimit Andi Kleen + * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * @@ -130,12 +131,13 @@ void __init net_random_init(void) static int net_random_reseed(void) { int i; - unsigned long seed[NR_CPUS]; + unsigned long seed; - get_random_bytes(seed, sizeof(seed)); for_each_possible_cpu(i) { struct nrnd_state *state = &per_cpu(net_rand_state,i); - __net_srandom(state, seed[i]); + + get_random_bytes(&seed, sizeof(seed)); + __net_srandom(state, seed); } return 0; } @@ -190,3 +192,215 @@ __be32 in_aton(const char *str) } EXPORT_SYMBOL(in_aton); + +#define IN6PTON_XDIGIT 0x00010000 +#define IN6PTON_DIGIT 0x00020000 +#define IN6PTON_COLON_MASK 0x00700000 +#define IN6PTON_COLON_1 0x00100000 /* single : requested */ +#define IN6PTON_COLON_2 0x00200000 /* second : requested */ +#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ +#define IN6PTON_DOT 0x00800000 /* . */ +#define IN6PTON_DELIM 0x10000000 +#define IN6PTON_NULL 0x20000000 /* first/tail */ +#define IN6PTON_UNKNOWN 0x40000000 + +static inline int digit2bin(char c, char delim) +{ + if (c == delim || c == '\0') + return IN6PTON_DELIM; + if (c == '.') + return IN6PTON_DOT; + if (c >= '0' && c <= '9') + return (IN6PTON_DIGIT | (c - '0')); + return IN6PTON_UNKNOWN; +} + +static inline int xdigit2bin(char c, char delim) +{ + if (c == delim || c == '\0') + return IN6PTON_DELIM; + if (c == ':') + return IN6PTON_COLON_MASK; + if (c == '.') + return IN6PTON_DOT; + if (c >= '0' && c <= '9') + return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); + if (c >= 'a' && c <= 'f') + return (IN6PTON_XDIGIT | (c - 'a' + 10)); + if (c >= 'A' && c <= 'F') + return (IN6PTON_XDIGIT | (c - 'A' + 10)); + return IN6PTON_UNKNOWN; +} + +int in4_pton(const char *src, int srclen, + u8 *dst, + char delim, const char **end) +{ + const char *s; + u8 *d; + u8 dbuf[4]; + int ret = 0; + int i; + int w = 0; + + if (srclen < 0) + srclen = strlen(src); + s = src; + d = dbuf; + i = 0; + while(1) { + int c; + c = xdigit2bin(srclen > 0 ? *s : '\0', delim); + if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) { + goto out; + } + if (c & (IN6PTON_DOT | IN6PTON_DELIM)) { + if (w == 0) + goto out; + *d++ = w & 0xff; + w = 0; + i++; + if (c & IN6PTON_DELIM) { + if (i != 4) + goto out; + break; + } + goto cont; + } + w = (w * 10) + c; + if ((w & 0xffff) > 255) { + goto out; + } +cont: + if (i >= 4) + goto out; + s++; + srclen--; + } + ret = 1; + memcpy(dst, dbuf, sizeof(dbuf)); +out: + if (end) + *end = s; + return ret; +} + +EXPORT_SYMBOL(in4_pton); + +int in6_pton(const char *src, int srclen, + u8 *dst, + char delim, const char **end) +{ + const char *s, *tok = NULL; + u8 *d, *dc = NULL; + u8 dbuf[16]; + int ret = 0; + int i; + int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; + int w = 0; + + memset(dbuf, 0, sizeof(dbuf)); + + s = src; + d = dbuf; + if (srclen < 0) + srclen = strlen(src); + + while (1) { + int c; + + c = xdigit2bin(srclen > 0 ? *s : '\0', delim); + if (!(c & state)) + goto out; + if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { + /* process one 16-bit word */ + if (!(state & IN6PTON_NULL)) { + *d++ = (w >> 8) & 0xff; + *d++ = w & 0xff; + } + w = 0; + if (c & IN6PTON_DELIM) { + /* We've processed last word */ + break; + } + /* + * COLON_1 => XDIGIT + * COLON_2 => XDIGIT|DELIM + * COLON_1_2 => COLON_2 + */ + switch (state & IN6PTON_COLON_MASK) { + case IN6PTON_COLON_2: + dc = d; + state = IN6PTON_XDIGIT | IN6PTON_DELIM; + if (dc - dbuf >= sizeof(dbuf)) + state |= IN6PTON_NULL; + break; + case IN6PTON_COLON_1|IN6PTON_COLON_1_2: + state = IN6PTON_XDIGIT | IN6PTON_COLON_2; + break; + case IN6PTON_COLON_1: + state = IN6PTON_XDIGIT; + break; + case IN6PTON_COLON_1_2: + state = IN6PTON_COLON_2; + break; + default: + state = 0; + } + tok = s + 1; + goto cont; + } + + if (c & IN6PTON_DOT) { + ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); + if (ret > 0) { + d += 4; + break; + } + goto out; + } + + w = (w << 4) | (0xff & c); + state = IN6PTON_COLON_1 | IN6PTON_DELIM; + if (!(w & 0xf000)) { + state |= IN6PTON_XDIGIT; + } + if (!dc && d + 2 < dbuf + sizeof(dbuf)) { + state |= IN6PTON_COLON_1_2; + state &= ~IN6PTON_DELIM; + } + if (d + 2 >= dbuf + sizeof(dbuf)) { + state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); + } +cont: + if ((dc && d + 4 < dbuf + sizeof(dbuf)) || + d + 4 == dbuf + sizeof(dbuf)) { + state |= IN6PTON_DOT; + } + if (d >= dbuf + sizeof(dbuf)) { + state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); + } + s++; + srclen--; + } + + i = 15; d--; + + if (dc) { + while(d >= dc) + dst[i--] = *d--; + while(i >= dc - dbuf) + dst[i--] = 0; + while(i >= 0) + dst[i--] = *d--; + } else + memcpy(dst, dbuf, sizeof(dbuf)); + + ret = 1; +out: + if (end) + *end = s; + return ret; +} + +EXPORT_SYMBOL(in6_pton); diff --git a/net/core/wireless.c b/net/core/wireless.c index d2bc72d318f7..ffff0da46c6e 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -68,11 +68,18 @@ * * v8 - 17.02.06 - Jean II * o RtNetlink requests support (SET/GET) + * + * v8b - 03.08.06 - Herbert Xu + * o Fix Wireless Event locking issues. + * + * v9 - 14.3.06 - Jean II + * o Change length in ESSID and NICK to strlen() instead of strlen()+1 + * o Make standard_ioctl_num and standard_event_num unsigned + * o Remove (struct net_device *)->get_wireless_stats() */ /***************************** INCLUDES *****************************/ -#include <linux/config.h> /* Not needed ??? */ #include <linux/module.h> #include <linux/types.h> /* off_t */ #include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ @@ -82,9 +89,11 @@ #include <linux/init.h> /* for __init */ #include <linux/if_arp.h> /* ARPHRD_ETHER */ #include <linux/etherdevice.h> /* compare_ether_addr */ +#include <linux/interrupt.h> #include <linux/wireless.h> /* Pretty obvious */ #include <net/iw_handler.h> /* New driver API */ +#include <net/netlink.h> #include <asm/uaccess.h> /* copy_to_user() */ @@ -233,24 +242,24 @@ static const struct iw_ioctl_description standard_ioctl[] = { [SIOCSIWESSID - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, .flags = IW_DESCR_FLAG_EVENT, }, [SIOCGIWESSID - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, .flags = IW_DESCR_FLAG_DUMP, }, [SIOCSIWNICKN - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, }, [SIOCGIWNICKN - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_POINT, .token_size = 1, - .max_tokens = IW_ESSID_MAX_SIZE + 1, + .max_tokens = IW_ESSID_MAX_SIZE, }, [SIOCSIWRATE - SIOCIWFIRST] = { .header_type = IW_HEADER_TYPE_PARAM, @@ -337,8 +346,8 @@ static const struct iw_ioctl_description standard_ioctl[] = { .max_tokens = sizeof(struct iw_pmksa), }, }; -static const int standard_ioctl_num = (sizeof(standard_ioctl) / - sizeof(struct iw_ioctl_description)); +static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) / + sizeof(struct iw_ioctl_description)); /* * Meta-data about all the additional standard Wireless Extension events @@ -388,8 +397,8 @@ static const struct iw_ioctl_description standard_event[] = { .max_tokens = sizeof(struct iw_pmkid_cand), }, }; -static const int standard_event_num = (sizeof(standard_event) / - sizeof(struct iw_ioctl_description)); +static const unsigned standard_event_num = (sizeof(standard_event) / + sizeof(struct iw_ioctl_description)); /* Size (in bytes) of the various private data types */ static const char iw_priv_type_size[] = { @@ -464,17 +473,6 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) (dev->wireless_handlers->get_wireless_stats != NULL)) return dev->wireless_handlers->get_wireless_stats(dev); - /* Old location, field to be removed in next WE */ - if(dev->get_wireless_stats) { - static int printed_message; - - if (!printed_message++) - printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n", - dev->name); - - return dev->get_wireless_stats(dev); - } - /* Not found */ return (struct iw_statistics *) NULL; } @@ -1844,6 +1842,43 @@ int wireless_rtnetlink_set(struct net_device * dev, #ifdef WE_EVENT_RTNETLINK /* ---------------------------------------------------------------- */ /* + * Locking... + * ---------- + * + * Thanks to Herbert Xu <herbert@gondor.apana.org.au> for fixing + * the locking issue in here and implementing this code ! + * + * The issue : wireless_send_event() is often called in interrupt context, + * while the Netlink layer can never be called in interrupt context. + * The fully formed RtNetlink events are queued, and then a tasklet is run + * to feed those to Netlink. + * The skb_queue is interrupt safe, and its lock is not held while calling + * Netlink, so there is no possibility of dealock. + * Jean II + */ + +static struct sk_buff_head wireless_nlevent_queue; + +static int __init wireless_nlevent_init(void) +{ + skb_queue_head_init(&wireless_nlevent_queue); + return 0; +} + +subsys_initcall(wireless_nlevent_init); + +static void wireless_nlevent_process(unsigned long data) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&wireless_nlevent_queue))) + rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); +} + +static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); + +/* ---------------------------------------------------------------- */ +/* * Fill a rtnetlink message with our event data. * Note that we propage only the specified event and don't dump the * current wireless config. Dumping the wireless config is far too @@ -1904,8 +1939,10 @@ static inline void rtmsg_iwinfo(struct net_device * dev, return; } NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); + skb_queue_tail(&wireless_nlevent_queue, skb); + tasklet_schedule(&wireless_nlevent_tasklet); } + #endif /* WE_EVENT_RTNETLINK */ /* ---------------------------------------------------------------- */ diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 859e3359fcda..e2a095d0fd80 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -40,6 +40,22 @@ config IP_DCCP_DEBUG Just say N. +config NET_DCCPPROBE + tristate "DCCP connection probing" + depends on PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to DCCP connection + state in response to incoming packets. It is used for debugging + DCCP congestion avoidance modules. If you don't understand + what was just said, you don't need it: say N. + + Documentation on how to use the packet generator can be found + at http://linux-net.osdl.org/index.php/DccpProbe + + To compile this code as a module, choose M here: the + module will be called dccp_probe. + + endmenu endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 7696e219b05d..17ed99c46617 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o +obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o +dccp_probe-y := probe.o obj-y += ccids/ diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 8c211c58893b..4d176d33983f 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -142,14 +142,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); if (av != NULL) { - av->dccpav_buf_head = - av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; + av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; av->dccpav_ack_ptr = 0; av->dccpav_time.tv_sec = 0; av->dccpav_time.tv_usec = 0; - av->dccpav_sent_len = av->dccpav_vec_len = 0; + av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } @@ -353,11 +352,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av, { struct dccp_ackvec_record *next; - av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1; - if (av->dccpav_buf_tail == 0) - av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; - - av->dccpav_vec_len -= avr->dccpavr_sent_len; + /* sort out vector length */ + if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) + av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; + else + av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 + - av->dccpav_buf_head + + avr->dccpavr_ack_ptr; /* free records */ list_for_each_entry_safe_from(avr, next, &av->dccpav_records, @@ -434,8 +435,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, break; found: if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & - DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { #ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index ec7a89bb7b39..2424effac7f6 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -11,7 +11,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/time.h> @@ -55,9 +54,7 @@ struct dccp_ackvec { struct list_head dccpav_records; struct timeval dccpav_time; u8 dccpav_buf_head; - u8 dccpav_buf_tail; u8 dccpav_ack_ptr; - u8 dccpav_sent_len; u8 dccpav_vec_len; u8 dccpav_buf_nonce; u8 dccpav_ack_nonce; @@ -108,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) { - return av->dccpav_sent_len != av->dccpav_vec_len; + return av->dccpav_vec_len; } #else /* CONFIG_IP_DCCP_ACKVEC */ static inline int dccp_ackvec_init(void) diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index ca00191628f7..32752f750447 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -30,6 +30,14 @@ config IP_DCCP_CCID2 If in doubt, say M. +config IP_DCCP_CCID2_DEBUG + bool "CCID2 debug" + depends on IP_DCCP_CCID2 + ---help--- + Enable CCID2 debug messages. + + If in doubt, say N. + config IP_DCCP_CCID3 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" depends on IP_DCCP diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index d4f9e2d33453..2efb505aeb35 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -27,18 +27,15 @@ * * BUGS: * - sequence number wrapping - * - jiffies wrapping */ -#include <linux/config.h> #include "../ccid.h" #include "../dccp.h" #include "ccid2.h" static int ccid2_debug; -#undef CCID2_DEBUG -#ifdef CCID2_DEBUG +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG #define ccid2_pr_debug(format, a...) \ do { if (ccid2_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ @@ -47,9 +44,7 @@ static int ccid2_debug; #define ccid2_pr_debug(format, a...) #endif -static const int ccid2_seq_len = 128; - -#ifdef CCID2_DEBUG +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { int len = 0; @@ -72,8 +67,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) /* packets are sent sequentially */ BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); - BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent); - BUG_ON(len > ccid2_seq_len); + BUG_ON(time_before(seqp->ccid2s_sent, + prev->ccid2s_sent)); seqp = prev; } @@ -85,16 +80,57 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) do { seqp = seqp->ccid2s_prev; len++; - BUG_ON(len > ccid2_seq_len); } while (seqp != hctx->ccid2hctx_seqh); - BUG_ON(len != ccid2_seq_len); ccid2_pr_debug("total len=%d\n", len); + BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); } #else #define ccid2_hc_tx_check_sanity(hctx) do {} while (0) #endif +static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, + gfp_t gfp) +{ + struct ccid2_seq *seqp; + int i; + + /* check if we have space to preserve the pointer to the buffer */ + if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / + sizeof(struct ccid2_seq*))) + return -ENOMEM; + + /* allocate buffer and initialize linked list */ + seqp = kmalloc(sizeof(*seqp) * num, gfp); + if (seqp == NULL) + return -ENOMEM; + + for (i = 0; i < (num - 1); i++) { + seqp[i].ccid2s_next = &seqp[i + 1]; + seqp[i + 1].ccid2s_prev = &seqp[i]; + } + seqp[num - 1].ccid2s_next = seqp; + seqp->ccid2s_prev = &seqp[num - 1]; + + /* This is the first allocation. Initiate the head and tail. */ + if (hctx->ccid2hctx_seqbufc == 0) + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; + else { + /* link the existing list with the one we just created */ + hctx->ccid2hctx_seqh->ccid2s_next = seqp; + seqp->ccid2s_prev = hctx->ccid2hctx_seqh; + + hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; + seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; + } + + /* store the original pointer to the buffer so we can free it */ + hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; + hctx->ccid2hctx_seqbufc++; + + return 0; +} + static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { @@ -123,7 +159,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, } } - return 100; /* XXX */ + return 1; /* XXX CCID should dequeue when ready instead of polling */ } static void ccid2_change_l_ack_ratio(struct sock *sk, int val) @@ -151,10 +187,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_cwnd(struct sock *sk, int val) +static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (val == 0) val = 1; @@ -165,6 +199,17 @@ static void ccid2_change_cwnd(struct sock *sk, int val) hctx->ccid2hctx_cwnd = val; } +static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) +{ + ccid2_pr_debug("change SRTT to %ld\n", val); + hctx->ccid2hctx_srtt = val; +} + +static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val) +{ + hctx->ccid2hctx_pipe = val; +} + static void ccid2_start_rto_timer(struct sock *sk); static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -194,11 +239,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_start_rto_timer(sk); /* adjust pipe, cwnd etc */ - hctx->ccid2hctx_pipe = 0; + ccid2_change_pipe(hctx, 0); hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; if (hctx->ccid2hctx_ssthresh < 2) hctx->ccid2hctx_ssthresh = 2; - ccid2_change_cwnd(sk, 1); + ccid2_change_cwnd(hctx, 1); /* clear state about stuff we sent */ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; @@ -233,13 +278,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_seq *next; u64 seq; ccid2_hc_tx_check_sanity(hctx); BUG_ON(!hctx->ccid2hctx_sendwait); hctx->ccid2hctx_sendwait = 0; - hctx->ccid2hctx_pipe++; + ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1); BUG_ON(hctx->ccid2hctx_pipe < 0); /* There is an issue. What if another packet is sent between @@ -252,15 +298,23 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) hctx->ccid2hctx_seqh->ccid2s_seq = seq; hctx->ccid2hctx_seqh->ccid2s_acked = 0; hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next; - ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, - hctx->ccid2hctx_pipe); + next = hctx->ccid2hctx_seqh->ccid2s_next; + /* check if we need to alloc more space */ + if (next == hctx->ccid2hctx_seqt) { + int rc; + + ccid2_pr_debug("allocating more space in history\n"); + rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL); + BUG_ON(rc); /* XXX what do we do? */ - if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) { - /* XXX allocate more space */ - WARN_ON(1); + next = hctx->ccid2hctx_seqh->ccid2s_next; + BUG_ON(next == hctx->ccid2hctx_seqt); } + hctx->ccid2hctx_seqh = next; + + ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, + hctx->ccid2hctx_pipe); hctx->ccid2hctx_sent++; @@ -296,7 +350,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) if (!timer_pending(&hctx->ccid2hctx_rtotimer)) ccid2_start_rto_timer(sk); -#ifdef CCID2_DEBUG +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); ccid2_pr_debug("Sent: seq=%llu\n", seq); do { @@ -399,7 +453,7 @@ static inline void ccid2_new_ack(struct sock *sk, /* increase every 2 acks */ hctx->ccid2hctx_ssacks++; if (hctx->ccid2hctx_ssacks == 2) { - ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); + ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1); hctx->ccid2hctx_ssacks = 0; *maxincr = *maxincr - 1; } @@ -412,26 +466,28 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_acks++; if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { - ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); + ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1); hctx->ccid2hctx_acks = 0; } } /* update RTO */ if (hctx->ccid2hctx_srtt == -1 || - (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) { - unsigned long r = jiffies - seqp->ccid2s_sent; + time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { + unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; int s; /* first measurement */ if (hctx->ccid2hctx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", r, jiffies, seqp->ccid2s_seq); - hctx->ccid2hctx_srtt = r; + ccid2_change_srtt(hctx, r); hctx->ccid2hctx_rttvar = r >> 1; } else { /* RTTVAR */ long tmp = hctx->ccid2hctx_srtt - r; + long srtt; + if (tmp < 0) tmp *= -1; @@ -441,10 +497,12 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_rttvar += tmp; /* SRTT */ - hctx->ccid2hctx_srtt *= 7; - hctx->ccid2hctx_srtt >>= 3; + srtt = hctx->ccid2hctx_srtt; + srtt *= 7; + srtt >>= 3; tmp = r >> 3; - hctx->ccid2hctx_srtt += tmp; + srtt += tmp; + ccid2_change_srtt(hctx, srtt); } s = hctx->ccid2hctx_rttvar << 2; /* clock granularity is 1 when based on jiffies */ @@ -480,13 +538,29 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - hctx->ccid2hctx_pipe--; + ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); BUG_ON(hctx->ccid2hctx_pipe < 0); if (hctx->ccid2hctx_pipe == 0) ccid2_hc_tx_kill_rto_timer(sk); } +static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, + struct ccid2_seq *seqp) +{ + if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { + ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); + return; + } + + hctx->ccid2hctx_last_cong = jiffies; + + ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); + hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; + if (hctx->ccid2hctx_ssthresh < 2) + hctx->ccid2hctx_ssthresh = 2; +} + static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -497,7 +571,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) unsigned char veclen; int offset = 0; int done = 0; - int loss = 0; unsigned int maxincr = 0; ccid2_hc_tx_check_sanity(hctx); @@ -583,15 +656,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = (*vector & - DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 state = *vector & + DCCP_ACKVEC_STATE_MASK; /* new packet received or marked */ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && !seqp->ccid2s_acked) { if (state == DCCP_ACKVEC_STATE_ECN_MARKED) { - loss = 1; + ccid2_congestion_event(hctx, + seqp); } else ccid2_new_ack(sk, seqp, &maxincr); @@ -643,7 +717,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* check for lost packets */ while (1) { if (!seqp->ccid2s_acked) { - loss = 1; + ccid2_pr_debug("Packet lost: %llu\n", + seqp->ccid2s_seq); + /* XXX need to traverse from tail -> head in + * order to detect multiple congestion events in + * one ack vector. + */ + ccid2_congestion_event(hctx, seqp); ccid2_hc_tx_dec_pipe(sk); } if (seqp == hctx->ccid2hctx_seqt) @@ -662,53 +742,33 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; } - if (loss) { - /* XXX do bit shifts guarantee a 0 as the new bit? */ - ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1); - hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; - if (hctx->ccid2hctx_ssthresh < 2) - hctx->ccid2hctx_ssthresh = 2; - } - ccid2_hc_tx_check_sanity(hctx); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); - int seqcount = ccid2_seq_len; - int i; - /* XXX init variables with proper values */ - hctx->ccid2hctx_cwnd = 1; - hctx->ccid2hctx_ssthresh = 10; + ccid2_change_cwnd(hctx, 1); + /* Initialize ssthresh to infinity. This means that we will exit the + * initial slow-start after the first packet loss. This is what we + * want. + */ + hctx->ccid2hctx_ssthresh = ~0; hctx->ccid2hctx_numdupack = 3; + hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ - hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) * - seqcount, gfp_any()); - if (hctx->ccid2hctx_seqbuf == NULL) + if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) return -ENOMEM; - for (i = 0; i < (seqcount - 1); i++) { - hctx->ccid2hctx_seqbuf[i].ccid2s_next = - &hctx->ccid2hctx_seqbuf[i + 1]; - hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev = - &hctx->ccid2hctx_seqbuf[i]; - } - hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next = - hctx->ccid2hctx_seqbuf; - hctx->ccid2hctx_seqbuf->ccid2s_prev = - &hctx->ccid2hctx_seqbuf[seqcount - 1]; - - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf; - hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; hctx->ccid2hctx_sent = 0; hctx->ccid2hctx_rto = 3 * HZ; - hctx->ccid2hctx_srtt = -1; + ccid2_change_srtt(hctx, -1); hctx->ccid2hctx_rttvar = -1; hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; + hctx->ccid2hctx_last_cong = jiffies; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; @@ -721,10 +781,13 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) static void ccid2_hc_tx_exit(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + int i; ccid2_hc_tx_kill_rto_timer(sk); - kfree(hctx->ccid2hctx_seqbuf); - hctx->ccid2hctx_seqbuf = NULL; + + for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) + kfree(hctx->ccid2hctx_seqbuf[i]); + hctx->ccid2hctx_seqbufc = 0; } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -745,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } static struct ccid_operations ccid2 = { - .ccid_id = 2, + .ccid_id = DCCPC_CCID2, .ccid_name = "ccid2", .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 451a87464fa5..5b2ef4acb300 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -35,6 +35,9 @@ struct ccid2_seq { struct ccid2_seq *ccid2s_next; }; +#define CCID2_SEQBUF_LEN 256 +#define CCID2_SEQBUF_MAX 128 + /** struct ccid2_hc_tx_sock - CCID2 TX half connection * * @ccid2hctx_ssacks - ACKs recv in slow start @@ -50,10 +53,11 @@ struct ccid2_hc_tx_sock { int ccid2hctx_cwnd; int ccid2hctx_ssacks; int ccid2hctx_acks; - int ccid2hctx_ssthresh; + unsigned int ccid2hctx_ssthresh; int ccid2hctx_pipe; int ccid2hctx_numdupack; - struct ccid2_seq *ccid2hctx_seqbuf; + struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; + int ccid2hctx_seqbufc; struct ccid2_seq *ccid2hctx_seqh; struct ccid2_seq *ccid2hctx_seqt; long ccid2hctx_rto; @@ -67,6 +71,7 @@ struct ccid2_hc_tx_sock { u64 ccid2hctx_rpseq; int ccid2hctx_rpdupack; int ccid2hctx_sendwait; + unsigned long ccid2hctx_last_cong; }; struct ccid2_hc_rx_sock { diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index b4a51d0355a5..67d2dc0e7c67 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,7 +2,7 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald <imcdnzl@gmail.com> + * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> * * An implementation of the DCCP protocol * @@ -34,7 +34,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/config.h> #include "../ccid.h" #include "../dccp.h" #include "lib/packet_history.h" @@ -343,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); } out: return rc; @@ -414,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: if (len > 0) { - hctx->ccid3hctx_t_nom = now; + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_t_ipi(hctx); ccid3_calc_new_delta(hctx); timeval_add_usecs(&hctx->ccid3hctx_t_nom, @@ -758,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } hcrx->ccid3hcrx_tstamp_last_feedback = now; - hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; - hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; + hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ @@ -783,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; - DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) return 0; @@ -855,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: + if (!tail) { + LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", + __FUNCTION__); + return ~0; + } rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -865,9 +871,20 @@ found: delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) + x_recv = hcrx->ccid3hcrx_x_recv; + tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); tmp2 = (u32)tmp1; + + if (!tmp2) { + LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " + "%s: x_recv = %u, rtt =%u\n", + __FUNCTION__, x_recv, rtt); + return ~0; + } + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ p = tfrc_calc_x_reverse_lookup(fval); @@ -883,31 +900,101 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct dccp_li_hist_entry *head; + u64 seq_temp; - if (seq_loss != DCCP_MAX_SEQNO + 1 && - list_empty(&hcrx->ccid3hcrx_li_hist)) { - struct dccp_li_hist_entry *li_tail; + if (list_empty(&hcrx->ccid3hcrx_li_hist)) { + if (!dccp_li_hist_interval_new(ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) + return; - li_tail = dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, - seq_loss, win_loss); - if (li_tail == NULL) + head = list_entry(hcrx->ccid3hcrx_li_hist.next, + struct dccp_li_hist_entry, dccplih_node); + head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = list_entry(hcrx->ccid3hcrx_li_hist.next, + struct dccp_li_hist_entry, dccplih_node); + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + + if (entry == NULL) { + printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); + dump_stack(); return; - li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } else - LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " - "interval\n", __FUNCTION__); + } + + list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); + + tail = hcrx->ccid3hcrx_li_hist.prev; + list_del(tail); + kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } } -static void ccid3_hc_rx_detect_loss(struct sock *sk) +static int ccid3_hc_rx_detect_loss(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - u8 win_loss; - const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, - &win_loss); + struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + u64 seqno = packet->dccphrx_seqno; + u64 tmp_seqno; + int loss = 0; + u8 ccval; + + + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + + if (!rx_hist || + follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + goto detect_out; + } + - ccid3_hc_rx_update_li(sk, seq_loss, win_loss); + while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) + > TFRC_RECV_NUM_LATE_LOSS) { + loss = 1; + ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + dccp_inc_seqno(&tmp_seqno); + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + dccp_inc_seqno(&tmp_seqno); + while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, + tmp_seqno, &ccval)) { + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_ccval_nonloss = ccval; + dccp_inc_seqno(&tmp_seqno); + } + } + + /* FIXME - this code could be simplified with above while */ + /* but works at moment */ + if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + } + +detect_out: + dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, packet, + hcrx->ccid3hcrx_seqno_nonloss); + return loss; } static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -917,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev, r_sample, t_elapsed; - int ins; + u32 p_prev, rtt_prev, r_sample, t_elapsed; + int loss; BUG_ON(hcrx == NULL || !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || @@ -933,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; - p_prev = hcrx->ccid3hcrx_rtt; + rtt_prev = hcrx->ccid3hcrx_rtt; dccp_timestamp(sk, &now); timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); r_sample = timeval_usecs(&now); @@ -952,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + r_sample / 10; - if (p_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", + if (rtt_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; @@ -974,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) win_count = packet->dccphrx_ccval; - ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, packet); + loss = ccid3_hc_rx_detect_loss(sk, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; @@ -992,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; - if (ins != 0) + if (loss) break; dccp_timestamp(sk, &now); @@ -1013,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); - ccid3_hc_rx_detect_loss(sk); p_prev = hcrx->ccid3hcrx_p; /* Calculate loss event rate */ @@ -1023,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Scaling up by 1000000 as fixed decimal */ if (i_mean != 0) hcrx->ccid3hcrx_p = 1000000 / i_mean; + } else { + printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); + dump_stack(); } if (hcrx->ccid3hcrx_p > p_prev) { @@ -1152,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, } static struct ccid_operations ccid3 = { - .ccid_id = 3, + .ccid_id = DCCPC_CCID3, .ccid_name = "ccid3", .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), @@ -1231,7 +1319,7 @@ static __exit void ccid3_module_exit(void) } module_exit(ccid3_module_exit); -MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index f18b96d4e5a2..0a2cb7536d26 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -1,13 +1,13 @@ /* * net/dccp/ccids/ccid3.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -36,7 +36,6 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ -#include <linux/config.h> #include <linux/list.h> #include <linux/time.h> #include <linux/types.h> @@ -121,9 +120,10 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_last_counter:48, + u64 ccid3hcrx_seqno_nonloss:48, + ccid3hcrx_ccval_nonloss:4, ccid3hcrx_state:8, - ccid3hcrx_last_counter:4; + ccid3hcrx_ccval_last_counter:4; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 4c01a54143ad..906c81ab9d4f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/loss_interval.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify @@ -11,8 +11,8 @@ * (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> +#include <net/sock.h> #include "loss_interval.h" @@ -91,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) u32 w_tot = 0; list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { - if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { + if (li_entry->dccplih_interval != ~0) { i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; w_tot += dccp_li_hist_w[i]; + if (i != 0) + i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; } - if (i != 0) - i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) break; @@ -108,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) i_tot = max(i_tot0, i_tot1); - /* FIXME: Why do we do this? -Ian McDonald */ - if (i_tot * 4 < w_tot) - i_tot = w_tot * 4; + if (!w_tot) { + LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); + return 1; + } - return i_tot * 4 / w_tot; + return i_tot / w_tot; } EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss) +int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss) { - struct dccp_li_hist_entry *tail = NULL, *entry; + struct dccp_li_hist_entry *entry; int i; - for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { + for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); - return NULL; + dump_stack(); + return 0; } - if (tail == NULL) - tail = entry; + entry->dccplih_interval = ~0; list_add(&entry->dccplih_node, list); } entry->dccplih_seqno = seq_loss; entry->dccplih_win_count = win_loss; - return tail; + return 1; } EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 417d9d82df3e..0ae85f0340b2 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/loss_interval.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or modify it @@ -13,7 +13,6 @@ * any later version. */ -#include <linux/config.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/time.h> @@ -53,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist, extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern struct dccp_li_hist_entry * - dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss); +extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index d3f9d2053830..b876c9c81c65 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -1,13 +1,13 @@ /* - * net/dccp/packet_history.h + * net/dccp/packet_history.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -34,7 +34,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/string.h> @@ -113,64 +112,27 @@ struct dccp_rx_hist_entry * EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); -int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet) + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno) { - struct dccp_rx_hist_entry *entry, *next, *iter; + struct dccp_rx_hist_entry *entry, *next; u8 num_later = 0; - iter = dccp_rx_hist_head(rx_list); - if (iter == NULL) - dccp_rx_hist_add_entry(rx_list, packet); - else { - const u64 seqno = packet->dccphrx_seqno; - - if (after48(seqno, iter->dccphrx_seqno)) - dccp_rx_hist_add_entry(rx_list, packet); - else { - if (dccp_rx_hist_entry_data_packet(iter)) - num_later = 1; - - list_for_each_entry_continue(iter, rx_list, - dccphrx_node) { - if (after48(seqno, iter->dccphrx_seqno)) { - dccp_rx_hist_add_entry(&iter->dccphrx_node, - packet); - goto trim_history; - } - - if (dccp_rx_hist_entry_data_packet(iter)) - num_later++; + list_add(&packet->dccphrx_node, rx_list); - if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(hist, packet); - return 1; - } - } - - if (num_later < TFRC_RECV_NUM_LATE_LOSS) - dccp_rx_hist_add_entry(rx_list, packet); - /* - * FIXME: else what? should we destroy the packet - * like above? - */ - } - } - -trim_history: - /* - * Trim history (remove all packets after the NUM_LATE_LOSS + 1 - * data packets) - */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(li_list)) { list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(hist, entry); + if (after48(nonloss_seqno, + entry->dccphrx_seqno)) { + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + } } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -218,94 +180,10 @@ trim_history: --num_later; } } - - return 0; } EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, - struct list_head *li_list, u8 *win_loss) -{ - struct dccp_rx_hist_entry *entry, *next, *packet; - struct dccp_rx_hist_entry *a_loss = NULL; - struct dccp_rx_hist_entry *b_loss = NULL; - u64 seq_loss = DCCP_MAX_SEQNO + 1; - u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - - list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - b_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (b_loss == NULL) - goto out; - - num_later = 1; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - a_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (a_loss == NULL) { - if (list_empty(li_list)) { - /* no loss event have occured yet */ - LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " - "comparing to initial seqno\n", - __FUNCTION__); - goto out; - } else { - LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", - __FUNCTION__); - goto out; - } - } - - /* Locate a lost data packet */ - entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, - packet->dccphrx_seqno); - - if (delta != 0) { - if (dccp_rx_hist_entry_data_packet(packet)) - --delta; - /* - * FIXME: check this, probably this % usage is because - * in earlier drafts the ndp count was just 8 bits - * long, but now it cam be up to 24 bits long. - */ -#if 0 - if (delta % DCCP_NDP_LIMIT != - (packet->dccphrx_ndp - - entry->dccphrx_ndp) % DCCP_NDP_LIMIT) -#endif - if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { - seq_loss = entry->dccphrx_seqno; - dccp_inc_seqno(&seq_loss); - } - } - packet = entry; - if (packet == a_loss) - break; - } -out: - if (seq_loss != DCCP_MAX_SEQNO + 1) - *win_loss = a_loss->dccphrx_ccval; - else - *win_loss = 0; /* Paranoia */ - - return seq_loss; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); - struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); @@ -366,6 +244,25 @@ struct dccp_tx_hist_entry * EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) +{ + struct dccp_rx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } + + if (packet) + *ccval = packet->dccphrx_ccval; + + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); + void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *packet) @@ -392,7 +289,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); -MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); MODULE_DESCRIPTION("DCCP TFRC library"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 122e96737ff6..067cf1c85a37 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -1,13 +1,13 @@ /* * net/dccp/packet_history.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -37,7 +37,6 @@ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ -#include <linux/config.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/time.h> @@ -107,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) @@ -165,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list); -static inline void dccp_rx_hist_add_entry(struct list_head *list, - struct dccp_rx_hist_entry *entry) -{ - list_add(&entry->dccphrx_node, list); -} - static inline struct dccp_rx_hist_entry * dccp_rx_hist_head(struct list_head *list) { @@ -189,10 +184,11 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet); + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 130c4c40cfe3..45f30f59ea2a 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/tfrc.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index add3cae65e2d..44076e0c6591 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/tfrc_equation.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * @@ -12,7 +12,6 @@ * (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/div64.h> diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 1fe509148689..0a21be437ed3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -5,14 +5,13 @@ * * An implementation of the DCCP protocol * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/dccp.h> #include <net/snmp.h> #include <net/sock.h> @@ -82,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2) return after48(seq1, seq2) ? seq1 : seq2; } +/* is seq1 next seqno after seq2 */ +static inline int follows48(const u64 seq1, const u64 seq2) +{ + int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF); + + return diff==1; +} + enum { DCCP_MIB_NUM = 0, DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ @@ -123,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); +extern void dccp_write_xmit(struct sock *sk, int block); extern void dccp_write_space(struct sock *sk); extern void dccp_init_xmit_timers(struct sock *sk); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 0f25dc395967..0f3745585a94 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -9,7 +9,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/inet_diag.h> diff --git a/net/dccp/feat.c b/net/dccp/feat.c index b39e2a597889..a1b0682ee77c 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include "dccp.h" diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 6048373c7186..cee553d416ca 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -26,4 +26,11 @@ extern void dccp_feat_clean(struct dccp_minisock *dmsk); extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); extern int dccp_feat_init(struct dccp_minisock *dmsk); +extern int dccp_feat_default_sequence_window; +extern int dccp_feat_default_rx_ccid; +extern int dccp_feat_default_tx_ccid; +extern int dccp_feat_default_ack_ratio; +extern int dccp_feat_default_send_ack_vector; +extern int dccp_feat_default_send_ndp_count; + #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index bfc53665516b..7f9dc6ac58c9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/skbuff.h> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f2c011fd2ba1..bf692c1c116f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/icmp.h> #include <linux/module.h> @@ -51,15 +50,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; - u32 daddr, nexthop; + __be32 daddr, nexthop; int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -502,11 +498,13 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; - req->rcv_wnd = 100; /* Fake, option parsing will get the - right value */ + req->rcv_wnd = dccp_feat_default_sequence_window; ireq->opt = NULL; /* @@ -607,10 +605,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, ntohs(dh->dccph_dport), - inet_iif(skb)); + nsk = inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, + inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -680,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, } }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -923,7 +922,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, ntohs(dh->dccph_dport), + skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 65e2ab0886e6..7a47399cf31f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/random.h> #include <linux/xfrm.h> @@ -32,6 +31,7 @@ #include "dccp.h" #include "ipv6.h" +#include "feat.h" /* Socket used for sending RSTs and ACKs */ static struct socket *dccp_v6_ctl_socket; @@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -230,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { @@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_req_classify_flow(req, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -704,12 +709,14 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq6 = inet6_rsk(req); ireq = inet_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr); - req->rcv_wnd = 100; /* Fake, option parsing will get the - right value */ + req->rcv_wnd = dccp_feat_default_sequence_window; ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || @@ -843,6 +850,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; @@ -864,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; @@ -962,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index e4d4e9309270..6eef81fdbe56 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -11,7 +11,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/ipv6.h> diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c0349e5b0551..9045438d6b36 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/skbuff.h> #include <linux/timer.h> diff --git a/net/dccp/options.c b/net/dccp/options.c index e9feb2a0c770..07a34696ac97 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -4,14 +4,13 @@ * An implementation of the DCCP protocol * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> @@ -30,6 +29,8 @@ int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; +EXPORT_SYMBOL_GPL(dccp_feat_default_sequence_window); + void dccp_minisock_init(struct dccp_minisock *dmsk) { dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window; diff --git a/net/dccp/output.c b/net/dccp/output.c index 7409e4a3abdf..7102e3aed4ca 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/kernel.h> #include <linux/skbuff.h> @@ -199,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + if (sk->sk_err) goto do_error; if (!*timeo) goto do_nonblock; @@ -235,37 +234,72 @@ do_interrupted: goto out; } -int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) +static void dccp_write_xmit_timer(unsigned long data) { + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); + else + dccp_write_xmit(sk, 0); + bh_unlock_sock(sk); + sock_put(sk); +} + +void dccp_write_xmit(struct sock *sk, int block) { - const struct dccp_sock *dp = dccp_sk(sk); - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + long timeo = 30000; /* If a packet is taking longer than 2 secs + we have other issues */ + + while ((skb = skb_peek(&sk->sk_write_queue))) { + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, skb->len); - if (err > 0) - err = dccp_wait_for_ccid(sk, skb, timeo); + if (err > 0) { + if (!block) { + sk_reset_timer(sk, &dp->dccps_xmit_timer, + msecs_to_jiffies(err)+jiffies); + break; + } else + err = dccp_wait_for_ccid(sk, skb, &timeo); + if (err) { + printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" + " %d\n", __FUNCTION__, err); + dump_stack(); + } + } - if (err == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int len = skb->len; + skb_dequeue(&sk->sk_write_queue); + if (err == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; - if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - - err = dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - } else - kfree_skb(skb); - - return err; + dcb->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + if (err) { + printk(KERN_CRIT "%s:err from " + "ccid_hc_tx_packet_sent %d\n", + __FUNCTION__, err); + dump_stack(); + } + } else + kfree(skb); + } } int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) @@ -427,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk) dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); icsk->icsk_retransmits = 0; + init_timer(&dp->dccps_xmit_timer); + dp->dccps_xmit_timer.data = (unsigned long)sk; + dp->dccps_xmit_timer.function = dccp_write_xmit_timer; } int dccp_connect(struct sock *sk) @@ -561,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; if (active) { + dccp_write_xmit(sk, 1); dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, prio)); + /* FIXME do we need a retransmit timer here? */ } else dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/probe.c b/net/dccp/probe.c new file mode 100644 index 000000000000..146496fce2e2 --- /dev/null +++ b/net/dccp/probe.c @@ -0,0 +1,198 @@ +/* + * dccp_probe - Observe the DCCP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> + * + * Modified for DCCP from Stephen Hemminger's code + * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/socket.h> +#include <linux/dccp.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/kfifo.h> +#include <linux/vmalloc.h> + +#include "dccp.h" +#include "ccid.h" +#include "ccids/ccid3.h" + +static int port; + +static int bufsize = 64 * 1024; + +static const char procname[] = "dccpprobe"; + +struct { + struct kfifo *fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timeval tstart; +} dccpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + struct timeval now; + char tbuf[256]; + + va_start(args, fmt); + do_gettimeofday(&now); + + now.tv_sec -= dccpw.tstart.tv_sec; + now.tv_usec -= dccpw.tstart.tv_usec; + if (now.tv_usec < 0) { + --now.tv_sec; + now.tv_usec += 1000000; + } + + len = sprintf(tbuf, "%lu.%06lu ", + (unsigned long) now.tv_sec, + (unsigned long) now.tv_usec); + len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); + va_end(args); + + kfifo_put(dccpw.fifo, tbuf, len); + wake_up(&dccpw.wait); +} + +static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + const struct dccp_minisock *dmsk = dccp_msk(sk); + const struct inet_sock *inet = inet_sk(sk); + const struct ccid3_hc_tx_sock *hctx; + + if (dmsk->dccpms_tx_ccid == DCCPC_CCID3) + hctx = ccid3_hc_tx_sk(sk); + else + hctx = NULL; + + if (port == 0 || ntohs(inet->dport) == port || + ntohs(inet->sport) == port) { + if (hctx) + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size, + hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi); + else + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size); + } + + jprobe_return(); + return 0; +} + +static struct jprobe dccp_send_probe = { + .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, }, + .entry = (kprobe_opcode_t *)&jdccp_sendmsg, +}; + +static int dccpprobe_open(struct inode *inode, struct file *file) +{ + kfifo_reset(dccpw.fifo); + do_gettimeofday(&dccpw.tstart); + return 0; +} + +static ssize_t dccpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt = 0; + unsigned char *tbuf; + + if (!buf || len < 0) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(dccpw.wait, + __kfifo_len(dccpw.fifo) != 0); + if (error) + goto out_free; + + cnt = kfifo_get(dccpw.fifo, tbuf, len); + error = copy_to_user(buf, tbuf, cnt); + +out_free: + vfree(tbuf); + + return error ? error : cnt; +} + +static struct file_operations dccpprobe_fops = { + .owner = THIS_MODULE, + .open = dccpprobe_open, + .read = dccpprobe_read, +}; + +static __init int dccpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&dccpw.wait); + spin_lock_init(&dccpw.lock); + dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); + + if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + goto err0; + + ret = register_jprobe(&dccp_send_probe); + if (ret) + goto err1; + + pr_info("DCCP watch registered (port=%d)\n", port); + return 0; +err1: + proc_net_remove(procname); +err0: + kfifo_free(dccpw.fifo); + return ret; +} +module_init(dccpprobe_init); + +static __exit void dccpprobe_exit(void) +{ + kfifo_free(dccpw.fifo); + proc_net_remove(procname); + unregister_jprobe(&dccp_send_probe); + +} +module_exit(dccpprobe_exit); + +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>"); +MODULE_DESCRIPTION("DCCP snooper"); +MODULE_LICENSE("GPL"); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 5317fd3e6691..72cbdcfc2c65 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -9,7 +9,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> @@ -218,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; + dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; return 0; @@ -268,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - /* - * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) - * before calling listen() - */ - if (dccp_service_not_initialized(sk)) - return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -485,7 +478,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, err = -EINVAL; else err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, - (struct dccp_so_feat *) + (struct dccp_so_feat __user *) optval); break; @@ -494,7 +487,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, err = -EINVAL; else err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, - (struct dccp_so_feat *) + (struct dccp_so_feat __user *) optval); break; @@ -541,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len, int err = -ENOENT, slen = 0, total_len = sizeof(u32); lock_sock(sk); - if (dccp_service_not_initialized(sk)) - goto out; - if ((sl = dp->dccps_service_list) != NULL) { slen = sl->dccpsl_nr * sizeof(u32); total_len += slen; @@ -663,17 +653,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - rc = dccp_write_xmit(sk, skb, &timeo); - /* - * XXX we don't use sk_write_queue, so just discard the packet. - * Current plan however is to _use_ sk_write_queue with - * an algorith similar to tcp_sendmsg, where the main difference - * is that in DCCP we have to respect packet boundaries, so - * no coalescing of skbs. - * - * This bug was _quickly_ found & fixed by just looking at an OSTRA - * generated callgraph 8) -acme - */ + skb_queue_tail(&sk->sk_write_queue, skb); + dccp_write_xmit(sk,0); out_release: release_sock(sk); return rc ? : len; @@ -847,6 +828,7 @@ static int dccp_close_state(struct sock *sk) void dccp_close(struct sock *sk, long timeout) { + struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; int state; @@ -863,6 +845,8 @@ void dccp_close(struct sock *sk, long timeout) goto adjudge_to_death; } + sk_stop_timer(sk, &dp->dccps_xmit_timer); + /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 64c89e9c229e..38bc157876f3 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -9,21 +9,14 @@ * as published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> +#include "feat.h" #ifndef CONFIG_SYSCTL #error This file should not be compiled without CONFIG_SYSCTL defined #endif -extern int dccp_feat_default_sequence_window; -extern int dccp_feat_default_rx_ccid; -extern int dccp_feat_default_tx_ccid; -extern int dccp_feat_default_ack_ratio; -extern int dccp_feat_default_send_ack_vector; -extern int dccp_feat_default_send_ndp_count; - static struct ctl_table dccp_default_table[] = { { .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 5244415e5f18..8447742f5615 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/skbuff.h> diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 92f2ec46fd22..36e72cb145b0 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -27,6 +27,7 @@ config DECNET config DECNET_ROUTER bool "DECnet: router support (EXPERIMENTAL)" depends on DECNET && EXPERIMENTAL + select FIB_RULES ---help--- Add support for turning your DECnet Endnode into a level 1 or 2 router. This is an experimental, but functional option. If you diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2b289ef20ab3..70e027375682 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -99,7 +99,6 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat dn_bind fixes *******************************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> @@ -131,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <linux/poll.h> #include <net/neighbour.h> #include <net/dst.h> +#include <net/fib_rules.h> #include <net/dn.h> #include <net/dn_nsp.h> #include <net/dn_dev.h> diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index a26ff9f44576..01861feb608d 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -24,7 +24,6 @@ * devices. All mtu based now. */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -35,6 +34,7 @@ #include <linux/seq_file.h> #include <linux/timer.h> #include <linux/string.h> +#include <linux/if_addr.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/skbuff.h> @@ -46,6 +46,7 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> +#include <net/fib_rules.h> #include <net/dn.h> #include <net/dn_dev.h> #include <net/dn_route.h> @@ -414,11 +415,7 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void) { struct dn_ifaddr *ifa; - ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); - - if (ifa) { - memset(ifa, 0, sizeof(*ifa)); - } + ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); return ifa; } @@ -749,20 +746,23 @@ rtattr_failure: static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + int payload = sizeof(struct ifaddrmsg) + 128; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); - return; - } - if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { + skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); + + err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); } static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1106,10 +1106,9 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err) return NULL; *err = -ENOBUFS; - if ((dn_db = kmalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL) + if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL) return NULL; - memset(dn_db, 0, sizeof(struct dn_dev)); memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); smp_wmb(); dev->dn_ptr = dn_db; @@ -1423,8 +1422,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, .dumpit = dn_fib_dump, }, - [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index bd4ce8681a12..1cf010124ec5 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -17,7 +17,6 @@ * this code was copied from it. * */ -#include <linux/config.h> #include <linux/string.h> #include <linux/net.h> #include <linux/socket.h> @@ -35,6 +34,7 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> +#include <net/fib_rules.h> #include <net/dn.h> #include <net/dn_route.h> #include <net/dn_fib.h> @@ -55,11 +55,9 @@ #define endfor_nexthops(fi) } -extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); - static DEFINE_SPINLOCK(dn_fib_multipath_lock); static struct dn_fib_info *dn_fib_info_list; -static DEFINE_RWLOCK(dn_fib_info_lock); +static DEFINE_SPINLOCK(dn_fib_info_lock); static struct { @@ -80,6 +78,9 @@ static struct [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, }; +static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); +static int dn_fib_sync_up(struct net_device *dev); + void dn_fib_free_info(struct dn_fib_info *fi) { if (fi->fib_dead == 0) { @@ -97,7 +98,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) void dn_fib_release_info(struct dn_fib_info *fi) { - write_lock(&dn_fib_info_lock); + spin_lock(&dn_fib_info_lock); if (fi && --fi->fib_treeref == 0) { if (fi->fib_next) fi->fib_next->fib_prev = fi->fib_prev; @@ -108,7 +109,7 @@ void dn_fib_release_info(struct dn_fib_info *fi) fi->fib_dead = 1; dn_fib_info_put(fi); } - write_unlock(&dn_fib_info_lock); + spin_unlock(&dn_fib_info_lock); } static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) @@ -284,11 +285,10 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta goto err_inval; } - fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); + fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); err = -ENOBUFS; if (fi == NULL) goto failure; - memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct dn_fib_nh)); fi->fib_protocol = r->rtm_protocol; fi->fib_nhs = nhs; @@ -380,13 +380,13 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock(&dn_fib_info_lock); + spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; fi->fib_prev = NULL; if (dn_fib_info_list) dn_fib_info_list->fib_prev = fi; dn_fib_info_list = fi; - write_unlock(&dn_fib_info_lock); + spin_unlock(&dn_fib_info_lock); return fi; err_inval: @@ -492,7 +492,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) rta[i-1] = (struct rtattr *)RTA_DATA(attr); } } @@ -509,7 +510,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 0); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); if (tb) return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); @@ -525,46 +526,13 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 1); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); if (tb) return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); return -ENOBUFS; } - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - int t; - int s_t; - struct dn_fib_table *tb; - - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) - return dn_cache_dump(skb, cb); - - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_MIN_TABLE; - - for(t = s_t; t <= RT_TABLE_MAX; t++) { - if (t < s_t) - continue; - if (t > s_t) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - tb = dn_fib_get_table(t, 0); - if (tb == NULL) - continue; - if (tb->dump(tb, skb, cb) < 0) - break; - } - - cb->args[0] = t; - - return skb->len; -} - static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) { struct dn_fib_table *tb; @@ -684,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) +static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -728,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) } -int dn_fib_sync_up(struct net_device *dev) +static int dn_fib_sync_up(struct net_device *dev) { int ret = 0; @@ -762,22 +730,6 @@ int dn_fib_sync_up(struct net_device *dev) return ret; } -void dn_fib_flush(void) -{ - int flushed = 0; - struct dn_fib_table *tb; - int id; - - for(id = RT_TABLE_MAX; id > 0; id--) { - if ((tb = dn_fib_get_table(id, 0)) == NULL) - continue; - flushed += tb->flush(tb); - } - - if (flushed) - dn_rt_cache_flush(-1); -} - static struct notifier_block dn_fib_dnaddr_notifier = { .notifier_call = dn_fib_dnaddr_event, }; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 66e230c3b328..ff0ebe99137d 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -24,7 +24,6 @@ * */ -#include <linux/config.h> #include <linux/net.h> #include <linux/module.h> #include <linux/socket.h> @@ -581,12 +580,11 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; - memset(s, 0, sizeof(*s)); rc = seq_open(file, &dn_neigh_seq_ops); if (rc) goto out_kfree; diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index a2ba9db1c376..72ecc6e62ec4 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -45,7 +45,6 @@ GNU General Public License for more details. *******************************************************************************/ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -587,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig goto out; } - err = sk_filter(sk, skb, 0); + err = sk_filter(sk, skb); if (err) goto out; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5abf7057af00..dd0761e3d280 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -55,7 +55,6 @@ GNU General Public License for more details. *******************************************************************************/ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -81,6 +80,7 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> +#include <net/fib_rules.h> #include <net/dn.h> #include <net/dn_dev.h> #include <net/dn_nsp.h> @@ -926,8 +926,13 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { if (!dev_out->dn_ptr) continue; - if (dn_dev_islocal(dev_out, oldflp->fld_src)) - break; + if (!dn_dev_islocal(dev_out, oldflp->fld_src)) + continue; + if ((dev_out->flags & IFF_LOOPBACK) && + oldflp->fld_dst && + !dn_dev_islocal(dev_out, oldflp->fld_dst)) + continue; + break; } read_unlock(&dev_base_lock); if (dev_out == NULL) @@ -1280,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); + src_map = fl.fld_src; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { @@ -1481,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 0; r->rtm_tos = 0; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; r->rtm_scope = RT_SCOPE_UNIVERSE; @@ -1605,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - - return err; + return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out_free: kfree_skb(skb); @@ -1777,14 +1781,9 @@ void __init dn_route_init(void) { int i, goal, order; - dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", - sizeof(struct dn_route), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!dn_dst_ops.kmem_cachep) - panic("DECnet: Failed to allocate dn_dst_cache\n"); - + dn_dst_ops.kmem_cachep = + kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); init_timer(&dn_route_timer); dn_route_timer.function = dn_dst_check_expire; dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 446faafe2065..3e0c882c90bf 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -11,261 +11,213 @@ * * * Changes: + * Steve Whitehouse <steve@chygwyn.com> + * Updated for Thomas Graf's generic rules * */ -#include <linux/config.h> -#include <linux/string.h> #include <linux/net.h> -#include <linux/socket.h> -#include <linux/sockios.h> #include <linux/init.h> -#include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> -#include <linux/proc_fs.h> #include <linux/netdevice.h> -#include <linux/timer.h> #include <linux/spinlock.h> -#include <linux/in_route.h> #include <linux/list.h> #include <linux/rcupdate.h> -#include <asm/atomic.h> -#include <asm/uaccess.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> +#include <net/fib_rules.h> #include <net/dn.h> #include <net/dn_fib.h> #include <net/dn_neigh.h> #include <net/dn_dev.h> +static struct fib_rules_ops dn_fib_rules_ops; + struct dn_fib_rule { - struct hlist_node r_hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - __le16 r_src; - __le16 r_srcmask; - __le16 r_dst; - __le16 r_dstmask; - __le16 r_srcmap; - u8 r_flags; + struct fib_rule common; + unsigned char dst_len; + unsigned char src_len; + __le16 src; + __le16 srcmask; + __le16 dst; + __le16 dstmask; + __le16 srcmap; + u8 flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; + u32 fwmask; #endif - int r_ifindex; - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; static struct dn_fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7fff, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7fff, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct hlist_head dn_fib_rules; +static LIST_HEAD(dn_fib_rules); + -int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) && -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - - err = -EPERM; - if (r == &default_rule) - break; - - hlist_del_rcu(&r->r_hlist); - r->r_dead = 1; - dn_fib_rule_put(r); - err = 0; - break; - } - } + struct fib_lookup_arg arg = { + .result = res, + }; + int err; + + err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); + res->r = arg.rule; return err; } -static inline void dn_fib_rule_put_rcu(struct rcu_head *head) +static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { - struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); - kfree(r); -} + int err = -EAGAIN; + struct dn_fib_table *tbl; -void dn_fib_rule_put(struct dn_fib_rule *r) -{ - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, dn_fib_rule_put_rcu); - else - printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); + switch(rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } + + tbl = dn_fib_get_table(rule->table, 0); + if (tbl == NULL) + goto errout; + + err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; } +static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U16 }, + [FRA_DST] = { .type = NLA_U16 }, + [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, +}; -int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - if (rtm->rtm_type == RTN_NAT) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct dn_fib_table *tb; - if (rtm->rtm_type == RTN_UNICAST) { - if ((tb = dn_fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = tb->n; - } - } + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + u16 daddr = fl->fld_dst; + u16 saddr = fl->fld_src; + + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; - new_r = kmalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - memset(new_r, 0, sizeof(*new_r)); - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); + if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask) + return 0; #endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = dev_get_by_name(new_r->r_ifname); - if (dev) { - new_r->r_ifindex = dev->ifindex; - dev_put(dev); - } - } - r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); - if (!new_r->r_preference) { - if (r && r->r_hlist.next != NULL) { - r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; + return 1; +} + +static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) + goto errout; + + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct dn_fib_table *table; + + table = dn_fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } + + rule->table = table->n; } } - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; + if (tb[FRA_SRC]) + r->src = nla_get_u16(tb[FRA_SRC]); + + if (tb[FRA_DST]) + r->dst = nla_get_u16(tb[FRA_DST]); + +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (tb[FRA_FWMARK]) { + r->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (r->fwmark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + r->fwmask = 0xFFFFFFFF; } - atomic_inc(&new_r->r_clntref); - if (last) - hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); - else - hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist); - return 0; -} + if (tb[FRA_FWMASK]) + r->fwmask = nla_get_u32(tb[FRA_FWMASK]); +#endif + r->src_len = frh->src_len; + r->srcmask = dnet_make_mask(r->src_len); + r->dst_len = frh->dst_len; + r->dstmask = dnet_make_mask(r->dst_len); + err = 0; +errout: + return err; +} -int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct dn_fib_rule *r, *policy; - struct dn_fib_table *tb; - __le16 saddr = flp->fld_src; - __le16 daddr = flp->fld_dst; - struct hlist_node *node; - int err; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len && (r->src_len != frh->src_len)) + return 0; - rcu_read_lock(); + if (frh->dst_len && (r->dst_len != frh->dst_len)) + return 0; - hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || #ifdef CONFIG_DECNET_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || + if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; + + if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - - switch(r->r_action) { - case RTN_UNICAST: - case RTN_NAT: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) - continue; - err = tb->lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } + if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC]))) + return 0; + + if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST]))) + return 0; - rcu_read_unlock(); - return -ESRCH; + return 1; } unsigned dnet_addr_type(__le16 addr) @@ -273,7 +225,7 @@ unsigned dnet_addr_type(__le16 addr) struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; - struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL]; + struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); res.r = NULL; @@ -286,141 +238,79 @@ unsigned dnet_addr_type(__le16 addr) return ret; } -__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) +static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) { - struct dn_fib_rule *r = res->r; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - if (r->r_action == RTN_NAT) { - int addrtype = dnet_addr_type(r->r_srcmap); + frh->family = AF_DECnet; + frh->dst_len = r->dst_len; + frh->src_len = r->src_len; + frh->tos = 0; - if (addrtype == RTN_NAT) { - saddr = (saddr&~r->r_srcmask)|r->r_srcmap; - *flags |= RTCF_SNAT; - } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { - saddr = r->r_srcmap; - *flags |= RTCF_MASQ; - } - } - return saddr; -} - -static void dn_fib_rules_detach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; - } -} +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (r->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); + if (r->fwmask || r->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask); +#endif + if (r->dst_len) + NLA_PUT_U16(skb, FRA_DST, r->dst); + if (r->src_len) + NLA_PUT_U16(skb, FRA_SRC, r->src); -static void dn_fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; + return 0; - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; - } +nla_put_failure: + return -ENOBUFS; } -static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) +static u32 dn_fib_rule_default_pref(void) { - struct net_device *dev = ptr; - - switch(event) { - case NETDEV_UNREGISTER: - dn_fib_rules_detach(dev); - dn_fib_sync_down(0, dev, 1); - case NETDEV_REGISTER: - dn_fib_rules_attach(dev); - dn_fib_sync_up(dev); + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&dn_fib_rules)) { + pos = dn_fib_rules.next; + if (pos->next != &dn_fib_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } - return NOTIFY_DONE; -} - - -static struct notifier_block dn_fib_rules_notifier = { - .notifier_call = dn_fib_rules_event, -}; - -static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, - struct netlink_callback *cb, unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_DECnet; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); -#endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 2, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 2, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return 0; } int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - int s_idx = cb->args[0]; - struct dn_fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (idx < s_idx) - continue; - if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) - break; - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; - - return skb->len; + return fib_rules_dump(skb, cb, AF_DECnet); } +static struct fib_rules_ops dn_fib_rules_ops = { + .family = AF_DECnet, + .rule_size = sizeof(struct dn_fib_rule), + .action = dn_fib_rule_action, + .match = dn_fib_rule_match, + .configure = dn_fib_rule_configure, + .compare = dn_fib_rule_compare, + .fill = dn_fib_rule_fill, + .default_pref = dn_fib_rule_default_pref, + .nlgroup = RTNLGRP_DECnet_RULE, + .policy = dn_fib_rule_policy, + .rules_list = &dn_fib_rules, + .owner = THIS_MODULE, +}; + void __init dn_fib_rules_init(void) { - INIT_HLIST_HEAD(&dn_fib_rules); - hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); - register_netdevice_notifier(&dn_fib_rules_notifier); + list_add_tail(&default_rule.common.list, &dn_fib_rules); + fib_rules_register(&dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - unregister_netdevice_notifier(&dn_fib_rules_notifier); + fib_rules_unregister(&dn_fib_rules_ops); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 0ebc46af1bdd..317904bb5896 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -12,7 +12,6 @@ * Changes: * */ -#include <linux/config.h> #include <linux/string.h> #include <linux/net.h> #include <linux/socket.h> @@ -31,6 +30,7 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> +#include <net/fib_rules.h> #include <net/dn.h> #include <net/dn_route.h> #include <net/dn_fib.h> @@ -75,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) #define RT_TABLE_MIN 1 - +#define DN_FIB_TABLE_HASHSZ 256 +static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; static DEFINE_RWLOCK(dn_fib_tables_lock); -struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; static kmem_cache_t *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; @@ -159,12 +159,10 @@ static void dn_rehash_zone(struct dn_zone *dz) break; } - ht = kmalloc(new_divisor*sizeof(struct dn_fib_node*), GFP_KERNEL); - + ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL); if (ht == NULL) return; - memset(ht, 0, new_divisor*sizeof(struct dn_fib_node *)); write_lock_bh(&dn_fib_tables_lock); old_ht = dz->dz_hash; dz->dz_hash = ht; @@ -185,11 +183,10 @@ static void dn_free_node(struct dn_fib_node *f) static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) { int i; - struct dn_zone *dz = kmalloc(sizeof(struct dn_zone), GFP_KERNEL); + struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL); if (!dz) return NULL; - memset(dz, 0, sizeof(struct dn_zone)); if (z) { dz->dz_divisor = 16; dz->dz_hashmask = 0x0F; @@ -198,14 +195,12 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) dz->dz_hashmask = 0; } - dz->dz_hash = kmalloc(dz->dz_divisor*sizeof(struct dn_fib_node *), GFP_KERNEL); - + dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL); if (!dz->dz_hash) { kfree(dz); return NULL; } - memset(dz->dz_hash, 0, dz->dz_divisor*sizeof(struct dn_fib_node*)); dz->dz_order = z; dz->dz_mask = dnet_make_mask(z); @@ -269,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern } static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; @@ -283,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = 0; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; rtm->rtm_type = type; @@ -332,29 +328,29 @@ rtattr_failure: } -static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, +static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; u32 pid = req ? req->pid : 0; - int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256); + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; - if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, - f->fn_type, f->fn_scope, &f->fn_key, z, - DN_FIB_INFO(f), 0) < 0) { + err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + f->fn_type, f->fn_scope, &f->fn_key, z, + DN_FIB_INFO(f), 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; - if (nlh->nlmsg_flags & NLM_F_ECHO) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); - if (nlh->nlmsg_flags & NLM_F_ECHO) - netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + + err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); } static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, @@ -365,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, { int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; for(i = 0; f; i++, f = f->fn_next) { if (i < s_i) continue; @@ -378,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope, &f->fn_key, dz->dz_order, f->fn_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -393,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for(h = 0; h < dz->dz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) continue; if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -417,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, struct dn_zone *dz; struct dn_hash *table = (struct dn_hash *)tb->data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&dn_fib_tables_lock); for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&dn_fib_tables_lock); return -1; } } read_unlock(&dn_fib_tables_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } +int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct dn_fib_table *tb; + struct hlist_node *node; + int dumped = 0; + + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + return dn_cache_dump(skb, cb); + + s_h = cb->args[0]; + s_e = cb->args[1]; + + for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { + e = 0; + hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } + } +out: + cb->args[1] = e; + cb->args[0] = h; + + return skb->len; +} + static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; @@ -745,9 +778,11 @@ out: } -struct dn_fib_table *dn_fib_get_table(int n, int create) +struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; + struct hlist_node *node; + unsigned int h; if (n < RT_TABLE_MIN) return NULL; @@ -755,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) if (n > RT_TABLE_MAX) return NULL; - if (dn_fib_tables[n]) - return dn_fib_tables[n]; + h = n & (DN_FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { + if (t->n == n) { + rcu_read_unlock(); + return t; + } + } + rcu_read_unlock(); if (!create) return NULL; @@ -777,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) t->flush = dn_fib_table_flush; t->dump = dn_fib_table_dump; memset(t->data, 0, sizeof(struct dn_hash)); - dn_fib_tables[n] = t; + hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); return t; } -static void dn_fib_del_tree(int n) -{ - struct dn_fib_table *t; - - write_lock(&dn_fib_tables_lock); - t = dn_fib_tables[n]; - dn_fib_tables[n] = NULL; - write_unlock(&dn_fib_tables_lock); - - kfree(t); -} - struct dn_fib_table *dn_fib_empty_table(void) { - int id; + u32 id; for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) - if (dn_fib_tables[id] == NULL) + if (dn_fib_get_table(id, 0) == NULL) return dn_fib_get_table(id, 1); return NULL; } +void dn_fib_flush(void) +{ + int flushed = 0; + struct dn_fib_table *tb; + struct hlist_node *node; + unsigned int h; + + for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) + flushed += tb->flush(tb); + } + + if (flushed) + dn_rt_cache_flush(-1); +} + void __init dn_fib_table_init(void) { dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", @@ -814,10 +860,17 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { - int i; - - for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i) - dn_fib_del_tree(i); + struct dn_fib_table *t; + struct hlist_node *node, *next; + unsigned int h; - return; + write_lock(&dn_fib_tables_lock); + for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], + hlist) { + hlist_del(&t->hlist); + kfree(t); + } + } + write_unlock(&dn_fib_tables_lock); } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 74133ecd7700..8b99bd33540d 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); /* Eventually we might send routing messages too */ diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index bda5920215fd..e246f054f368 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -13,7 +13,6 @@ * Steve Whitehouse - Memory buffer settings, like the tcp ones * */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/fs.h> diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 868265619dbb..4d66aac13483 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -9,7 +9,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -674,12 +673,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) edev = dev->ec_ptr; if (edev == NULL) { /* Magic up a new one. */ - edev = kmalloc(sizeof(struct ec_device), GFP_KERNEL); + edev = kzalloc(sizeof(struct ec_device), GFP_KERNEL); if (edev == NULL) { err = -ENOMEM; break; } - memset(edev, 0, sizeof(struct ec_device)); dev->ec_ptr = edev; } else net2dev_map[edev->net] = NULL; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index c971f14712ec..4bd78c8cfb26 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -51,7 +51,6 @@ #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/if_ether.h> #include <net/dst.h> @@ -65,81 +64,79 @@ __setup("ether=", netdev_boot_setup); -/* - * Create the Ethernet MAC header for an arbitrary protocol layer +/** + * eth_header - create the Ethernet header + * @skb: buffer to alter + * @dev: source device + * @type: Ethernet type field + * @daddr: destination address (NULL leave destination address) + * @saddr: source address (NULL use device source address) + * @len: packet length (<= skb->len) * - * saddr=NULL means use device source address - * daddr=NULL means leave destination address (eg unresolved arp) + * + * Set the protocol type. For a packet of type ETH_P_802_3 we put the length + * in here instead. It is up to the 802.2 layer to carry protocol information. */ - int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + void *daddr, void *saddr, unsigned len) { - struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); - /* - * Set the protocol type. For a packet of type ETH_P_802_3 we put the length - * in here instead. It is up to the 802.2 layer to carry protocol information. - */ - - if(type!=ETH_P_802_3) + if (type != ETH_P_802_3) eth->h_proto = htons(type); else eth->h_proto = htons(len); /* - * Set the source hardware address. + * Set the source hardware address. */ - - if(!saddr) + + if (!saddr) saddr = dev->dev_addr; - memcpy(eth->h_source,saddr,dev->addr_len); + memcpy(eth->h_source, saddr, dev->addr_len); - if(daddr) - { - memcpy(eth->h_dest,daddr,dev->addr_len); + if (daddr) { + memcpy(eth->h_dest, daddr, dev->addr_len); return ETH_HLEN; } - + /* - * Anyway, the loopback-device should never use this function... + * Anyway, the loopback-device should never use this function... */ - if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) - { + if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { memset(eth->h_dest, 0, dev->addr_len); return ETH_HLEN; } - + return -ETH_HLEN; } - -/* - * Rebuild the Ethernet MAC header. This is called after an ARP - * (or in future other address resolution) has completed on this - * sk_buff. We now let ARP fill in the other fields. +/** + * eth_rebuild_header- rebuild the Ethernet MAC header. + * @skb: socket buffer to update * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. + * This is called after an ARP or IPV6 ndisc it's resolution on this + * sk_buff. We now let protocol (ARP) fill in the other fields. + * + * This routine CANNOT use cached dst->neigh! + * Really, it is used only when dst->neigh is wrong. */ - int eth_rebuild_header(struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)skb->data; struct net_device *dev = skb->dev; - switch (eth->h_proto) - { + switch (eth->h_proto) { #ifdef CONFIG_INET case __constant_htons(ETH_P_IP): - return arp_find(eth->h_dest, skb); -#endif + return arp_find(eth->h_dest, skb); +#endif default: printk(KERN_DEBUG - "%s: unable to resolve type %X addresses.\n", + "%s: unable to resolve type %X addresses.\n", dev->name, (int)eth->h_proto); - + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); break; } @@ -147,62 +144,70 @@ int eth_rebuild_header(struct sk_buff *skb) return 0; } - -/* - * Determine the packet's protocol ID. The rule here is that we - * assume 802.3 if the type field is short enough to be a length. - * This is normal practice and works for any 'now in use' protocol. +/** + * eth_type_trans - determine the packet's protocol ID. + * @skb: received socket data + * @dev: receiving network device + * + * The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. */ - __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; - + skb->mac.raw = skb->data; - skb_pull(skb,ETH_HLEN); + skb_pull(skb, ETH_HLEN); eth = eth_hdr(skb); - + if (is_multicast_ether_addr(eth->h_dest)) { if (!compare_ether_addr(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; } - + /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. */ - - else if(1 /*dev->flags&IFF_PROMISC*/) { + + else if (1 /*dev->flags&IFF_PROMISC */ ) { if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } - + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - + rawp = skb->data; - + /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. */ if (*(unsigned short *)rawp == 0xFFFF) return htons(ETH_P_802_3); - + /* - * Real 802.2 LLC + * Real 802.2 LLC */ return htons(ETH_P_802_2); } +EXPORT_SYMBOL(eth_type_trans); +/** + * eth_header_parse - extract hardware address from packet + * @skb: packet to extract header from + * @haddr: destination buffer + */ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) { struct ethhdr *eth = eth_hdr(skb); @@ -210,14 +215,20 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) return ETH_ALEN; } +/** + * eth_header_cache - fill cache entry from neighbour + * @neigh: source neighbour + * @hh: destination cache entry + * Create an Ethernet header template from the neighbour. + */ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) { - unsigned short type = hh->hh_type; + __be16 type = hh->hh_type; struct ethhdr *eth; struct net_device *dev = neigh->dev; - eth = (struct ethhdr*) - (((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); + eth = (struct ethhdr *) + (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == __constant_htons(ETH_P_802_3)) return -1; @@ -229,27 +240,47 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) return 0; } -/* +/** + * eth_header_cache_update - update cache entry + * @hh: destination cache entry + * @dev: network device + * @haddr: new hardware address + * * Called by Address Resolution module to notify changes in address. */ - -void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) +void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, + unsigned char *haddr) { - memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), + memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), haddr, dev->addr_len); } -EXPORT_SYMBOL(eth_type_trans); - +/** + * eth_mac_addr - set new Ethernet hardware address + * @dev: network device + * @p: socket address + * Change hardware address of device. + * + * This doesn't change hardware matching, so needs to be overridden + * for most real devices. + */ static int eth_mac_addr(struct net_device *dev, void *p) { - struct sockaddr *addr=p; + struct sockaddr *addr = p; if (netif_running(dev)) return -EBUSY; - memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } +/** + * eth_change_mtu - set new MTU size + * @dev: network device + * @new_mtu: new Maximum Transfer Unit + * + * Allow changing MTU size. Needs to be overridden for devices + * supporting jumbo frames. + */ static int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) @@ -258,8 +289,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) return 0; } -/* - * Fill in the fields of the device structure with ethernet-generic values. +/** + * ether_setup - setup Ethernet network device + * @dev: network device + * Fill in the fields of the device structure with Ethernet-generic values. */ void ether_setup(struct net_device *dev) { @@ -278,21 +311,21 @@ void ether_setup(struct net_device *dev) dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - memset(dev->broadcast,0xFF, ETH_ALEN); + memset(dev->broadcast, 0xFF, ETH_ALEN); } EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev - Allocates and sets up an ethernet device + * alloc_etherdev - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated - * for this ethernet device + * for this Ethernet device * - * Fill in the fields of the device structure with ethernet-generic + * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. * * Constructs a new net device, complete with a private data area of - * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for * this private data area. */ diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index dbb08528ddf5..f7e84e9d13ad 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -58,6 +58,7 @@ config IEEE80211_CRYPT_TKIP depends on IEEE80211 && NET_RADIO select CRYPTO select CRYPTO_MICHAEL_MIC + select CRC32 ---help--- Include software based cipher suites in support of IEEE 802.11i (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c index cb71d794a7d1..5ed0a98b2d76 100644 --- a/net/ieee80211/ieee80211_crypt.c +++ b/net/ieee80211/ieee80211_crypt.c @@ -110,11 +110,10 @@ int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) unsigned long flags; struct ieee80211_crypto_alg *alg; - alg = kmalloc(sizeof(*alg), GFP_KERNEL); + alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) return -ENOMEM; - memset(alg, 0, sizeof(*alg)); alg->ops = ops; spin_lock_irqsave(&ieee80211_crypto_lock, flags); diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 78b2d13e80e3..35aa3426c3fa 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -9,7 +9,7 @@ * more details. */ -#include <linux/config.h> +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -49,7 +49,7 @@ struct ieee80211_ccmp_data { int key_idx; - struct crypto_tfm *tfm; + struct crypto_cipher *tfm; /* scratch buffers for virt_to_page() (crypto API) */ u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], @@ -57,36 +57,26 @@ struct ieee80211_ccmp_data { u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; }; -static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm, - const u8 pt[16], u8 ct[16]) +static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, + const u8 pt[16], u8 ct[16]) { - struct scatterlist src, dst; - - src.page = virt_to_page(pt); - src.offset = offset_in_page(pt); - src.length = AES_BLOCK_LEN; - - dst.page = virt_to_page(ct); - dst.offset = offset_in_page(ct); - dst.length = AES_BLOCK_LEN; - - crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN); + crypto_cipher_encrypt_one(tfm, ct, pt); } static void *ieee80211_ccmp_init(int key_idx) { struct ieee80211_ccmp_data *priv; - priv = kmalloc(sizeof(*priv), GFP_ATOMIC); + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) goto fail; - memset(priv, 0, sizeof(*priv)); priv->key_idx = key_idx; - priv->tfm = crypto_alloc_tfm("aes", 0); - if (priv->tfm == NULL) { + priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm)) { printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate " "crypto API aes\n"); + priv->tfm = NULL; goto fail; } @@ -95,7 +85,7 @@ static void *ieee80211_ccmp_init(int key_idx) fail: if (priv) { if (priv->tfm) - crypto_free_tfm(priv->tfm); + crypto_free_cipher(priv->tfm); kfree(priv); } @@ -106,7 +96,7 @@ static void ieee80211_ccmp_deinit(void *priv) { struct ieee80211_ccmp_data *_priv = priv; if (_priv && _priv->tfm) - crypto_free_tfm(_priv->tfm); + crypto_free_cipher(_priv->tfm); kfree(priv); } @@ -117,7 +107,7 @@ static inline void xor_block(u8 * b, u8 * a, size_t len) b[i] ^= a[i]; } -static void ccmp_init_blocks(struct crypto_tfm *tfm, +static void ccmp_init_blocks(struct crypto_cipher *tfm, struct ieee80211_hdr_4addr *hdr, u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) { @@ -273,6 +263,27 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) return 0; } +/* + * deal with seq counter wrapping correctly. + * refer to timer_after() for jiffies wrapping handling + */ +static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) +{ + u32 iv32_n, iv16_n; + u32 iv32_o, iv16_o; + + iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3]; + iv16_n = (pn_n[4] << 8) | pn_n[5]; + + iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3]; + iv16_o = (pn_o[4] << 8) | pn_o[5]; + + if ((s32)iv32_n - (s32)iv32_o < 0 || + (iv32_n == iv32_o && iv16_n <= iv16_o)) + return 1; + return 0; +} + static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_ccmp_data *key = priv; @@ -325,7 +336,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pn[5] = pos[0]; pos += 8; - if (memcmp(pn, key->rx_pn, CCMP_PN_LEN) <= 0) { + if (ccmp_replay_check(pn, key->rx_pn)) { if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: replay detected: STA=" MAC_FMT " previous PN %02x%02x%02x%02x%02x%02x " @@ -379,7 +390,7 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_ccmp_data *data = priv; int keyidx; - struct crypto_tfm *tfm = data->tfm; + struct crypto_cipher *tfm = data->tfm; keyidx = data->key_idx; memset(data, 0, sizeof(*data)); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 3fa5df2e1f0b..4200ec509866 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -9,7 +9,7 @@ * more details. */ -#include <linux/config.h> +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -53,8 +53,10 @@ struct ieee80211_tkip_data { int key_idx; - struct crypto_tfm *tfm_arc4; - struct crypto_tfm *tfm_michael; + struct crypto_blkcipher *rx_tfm_arc4; + struct crypto_hash *rx_tfm_michael; + struct crypto_blkcipher *tx_tfm_arc4; + struct crypto_hash *tx_tfm_michael; /* scratch buffers for virt_to_page() (crypto API) */ u8 rx_hdr[16], tx_hdr[16]; @@ -86,17 +88,39 @@ static void *ieee80211_tkip_init(int key_idx) priv->key_idx = key_idx; - priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0); - if (priv->tfm_arc4 == NULL) { + priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tx_tfm_arc4)) { printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " "crypto API arc4\n"); + priv->tx_tfm_arc4 = NULL; goto fail; } - priv->tfm_michael = crypto_alloc_tfm("michael_mic", 0); - if (priv->tfm_michael == NULL) { + priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tx_tfm_michael)) { printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " "crypto API michael_mic\n"); + priv->tx_tfm_michael = NULL; + goto fail; + } + + priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->rx_tfm_arc4)) { + printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + "crypto API arc4\n"); + priv->rx_tfm_arc4 = NULL; + goto fail; + } + + priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->rx_tfm_michael)) { + printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " + "crypto API michael_mic\n"); + priv->rx_tfm_michael = NULL; goto fail; } @@ -104,10 +128,14 @@ static void *ieee80211_tkip_init(int key_idx) fail: if (priv) { - if (priv->tfm_michael) - crypto_free_tfm(priv->tfm_michael); - if (priv->tfm_arc4) - crypto_free_tfm(priv->tfm_arc4); + if (priv->tx_tfm_michael) + crypto_free_hash(priv->tx_tfm_michael); + if (priv->tx_tfm_arc4) + crypto_free_blkcipher(priv->tx_tfm_arc4); + if (priv->rx_tfm_michael) + crypto_free_hash(priv->rx_tfm_michael); + if (priv->rx_tfm_arc4) + crypto_free_blkcipher(priv->rx_tfm_arc4); kfree(priv); } @@ -117,10 +145,16 @@ static void *ieee80211_tkip_init(int key_idx) static void ieee80211_tkip_deinit(void *priv) { struct ieee80211_tkip_data *_priv = priv; - if (_priv && _priv->tfm_michael) - crypto_free_tfm(_priv->tfm_michael); - if (_priv && _priv->tfm_arc4) - crypto_free_tfm(_priv->tfm_arc4); + if (_priv) { + if (_priv->tx_tfm_michael) + crypto_free_hash(_priv->tx_tfm_michael); + if (_priv->tx_tfm_arc4) + crypto_free_blkcipher(_priv->tx_tfm_arc4); + if (_priv->rx_tfm_michael) + crypto_free_hash(_priv->rx_tfm_michael); + if (_priv->rx_tfm_arc4) + crypto_free_blkcipher(_priv->rx_tfm_arc4); + } kfree(priv); } @@ -319,6 +353,7 @@ static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; + struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; int len; u8 rc4key[16], *pos, *icv; u32 crc; @@ -352,18 +387,30 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[2] = crc >> 16; icv[3] = crc >> 24; - crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16); + crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = len + 4; - crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4); + return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); +} +/* + * deal with seq counter wrapping correctly. + * refer to timer_after() for jiffies wrapping handling + */ +static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, + u32 iv32_o, u16 iv16_o) +{ + if ((s32)iv32_n - (s32)iv32_o < 0 || + (iv32_n == iv32_o && iv16_n <= iv16_o)) + return 1; return 0; } static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; + struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; u8 rc4key[16]; u8 keyidx, *pos; u32 iv32; @@ -415,8 +462,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); pos += 8; - if (iv32 < tkey->rx_iv32 || - (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) { + if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { if (net_ratelimit()) { printk(KERN_DEBUG "TKIP: replay detected: STA=" MAC_FMT " previous TSC %08x%04x received TSC " @@ -435,11 +481,18 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 12; - crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16); + crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = plen + 4; - crypto_cipher_decrypt(tkey->tfm_arc4, &sg, &sg, plen + 4); + if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { + if (net_ratelimit()) { + printk(KERN_DEBUG ": TKIP: failed to decrypt " + "received packet from " MAC_FMT "\n", + MAC_ARG(hdr->addr2)); + } + return -7; + } crc = ~crc32_le(~0, pos, plen); icv[0] = crc; @@ -473,12 +526,13 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } -static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, +static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, u8 * data, size_t data_len, u8 * mic) { + struct hash_desc desc; struct scatterlist sg[2]; - if (tkey->tfm_michael == NULL) { + if (tfm_michael == NULL) { printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); return -1; } @@ -490,12 +544,12 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, sg[1].offset = offset_in_page(data); sg[1].length = data_len; - crypto_digest_init(tkey->tfm_michael); - crypto_digest_setkey(tkey->tfm_michael, key, 8); - crypto_digest_update(tkey->tfm_michael, sg, 2); - crypto_digest_final(tkey->tfm_michael, mic); + if (crypto_hash_setkey(tfm_michael, key, 8)) + return -1; - return 0; + desc.tfm = tfm_michael; + desc.flags = 0; + return crypto_hash_digest(&desc, sg, data_len + 16, mic); } static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) @@ -529,7 +583,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) if (stype & IEEE80211_STYPE_QOS_DATA) { const struct ieee80211_hdr_3addrqos *qoshdr = (struct ieee80211_hdr_3addrqos *)skb->data; - hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; + hdr[12] = qoshdr->qos_ctl & cpu_to_le16(IEEE80211_QCTL_TID); } else hdr[12] = 0; /* priority */ @@ -551,7 +605,7 @@ static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, michael_mic_hdr(skb, tkey->tx_hdr); pos = skb_put(skb, 8); - if (michael_mic(tkey, &tkey->key[16], tkey->tx_hdr, + if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) return -1; @@ -589,7 +643,7 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, return -1; michael_mic_hdr(skb, tkey->rx_hdr); - if (michael_mic(tkey, &tkey->key[24], tkey->rx_hdr, + if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) return -1; if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { @@ -619,14 +673,18 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_tkip_data *tkey = priv; int keyidx; - struct crypto_tfm *tfm = tkey->tfm_michael; - struct crypto_tfm *tfm2 = tkey->tfm_arc4; + struct crypto_hash *tfm = tkey->tx_tfm_michael; + struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; + struct crypto_hash *tfm3 = tkey->rx_tfm_michael; + struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4; keyidx = tkey->key_idx; memset(tkey, 0, sizeof(*tkey)); tkey->key_idx = keyidx; - tkey->tfm_michael = tfm; - tkey->tfm_arc4 = tfm2; + tkey->tx_tfm_michael = tfm; + tkey->tx_tfm_arc4 = tfm2; + tkey->rx_tfm_michael = tfm3; + tkey->rx_tfm_arc4 = tfm4; if (len == TKIP_KEY_LEN) { memcpy(tkey->key, key, TKIP_KEY_LEN); tkey->key_set = 1; diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 649e581fa565..1b2efff11d39 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -9,7 +9,7 @@ * more details. */ -#include <linux/config.h> +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -33,26 +33,34 @@ struct prism2_wep_data { u8 key[WEP_KEY_LEN + 1]; u8 key_len; u8 key_idx; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tx_tfm; + struct crypto_blkcipher *rx_tfm; }; static void *prism2_wep_init(int keyidx) { struct prism2_wep_data *priv; - priv = kmalloc(sizeof(*priv), GFP_ATOMIC); + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); if (priv == NULL) goto fail; - memset(priv, 0, sizeof(*priv)); priv->key_idx = keyidx; - priv->tfm = crypto_alloc_tfm("arc4", 0); - if (priv->tfm == NULL) { + priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tx_tfm)) { printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " "crypto API arc4\n"); + priv->tx_tfm = NULL; goto fail; } + priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->rx_tfm)) { + printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " + "crypto API arc4\n"); + priv->rx_tfm = NULL; + goto fail; + } /* start WEP IV from a random value */ get_random_bytes(&priv->iv, 4); @@ -60,8 +68,10 @@ static void *prism2_wep_init(int keyidx) fail: if (priv) { - if (priv->tfm) - crypto_free_tfm(priv->tfm); + if (priv->tx_tfm) + crypto_free_blkcipher(priv->tx_tfm); + if (priv->rx_tfm) + crypto_free_blkcipher(priv->rx_tfm); kfree(priv); } return NULL; @@ -70,8 +80,12 @@ static void *prism2_wep_init(int keyidx) static void prism2_wep_deinit(void *priv) { struct prism2_wep_data *_priv = priv; - if (_priv && _priv->tfm) - crypto_free_tfm(_priv->tfm); + if (_priv) { + if (_priv->tx_tfm) + crypto_free_blkcipher(_priv->tx_tfm); + if (_priv->rx_tfm) + crypto_free_blkcipher(_priv->rx_tfm); + } kfree(priv); } @@ -122,6 +136,7 @@ static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct prism2_wep_data *wep = priv; + struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; u32 crc, klen, len; u8 *pos, *icv; struct scatterlist sg; @@ -153,13 +168,11 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[2] = crc >> 16; icv[3] = crc >> 24; - crypto_cipher_setkey(wep->tfm, key, klen); + crypto_blkcipher_setkey(wep->tx_tfm, key, klen); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = len + 4; - crypto_cipher_encrypt(wep->tfm, &sg, &sg, len + 4); - - return 0; + return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } /* Perform WEP decryption on given buffer. Buffer includes whole WEP part of @@ -172,6 +185,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct prism2_wep_data *wep = priv; + struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; u32 crc, klen, plen; u8 key[WEP_KEY_LEN + 3]; u8 keyidx, *pos, icv[4]; @@ -196,11 +210,12 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) /* Apply RC4 to data and compute CRC32 over decrypted data */ plen = skb->len - hdr_len - 8; - crypto_cipher_setkey(wep->tfm, key, klen); + crypto_blkcipher_setkey(wep->rx_tfm, key, klen); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = plen + 4; - crypto_cipher_decrypt(wep->tfm, &sg, &sg, plen + 4); + if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) + return -7; crc = ~crc32_le(~0, pos, plen); icv[0] = crc; diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c index 192243ab35ed..305a09de85a5 100644 --- a/net/ieee80211/ieee80211_geo.c +++ b/net/ieee80211/ieee80211_geo.c @@ -24,7 +24,6 @@ ******************************************************************************/ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 2cb84d84f671..13b1e5fff7e4 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -31,7 +31,6 @@ *******************************************************************************/ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 2bf567fd5a17..770704183a1b 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -14,7 +14,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> @@ -369,6 +368,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* Put this code here so that we avoid duplicating it in all * Rx paths. - Jean II */ +#ifdef CONFIG_WIRELESS_EXT #ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ /* If spy monitoring on */ if (ieee->spy_data.spy_number > 0) { @@ -397,15 +397,16 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, wireless_spy_update(ieee->dev, hdr->addr2, &wstats); } #endif /* IW_WIRELESS_SPY */ +#endif /* CONFIG_WIRELESS_EXT */ #ifdef NOT_YET hostap_update_rx_stats(local->ap, hdr, rx_stats); #endif if (ieee->iw_mode == IW_MODE_MONITOR) { - ieee80211_monitor_rx(ieee, skb, rx_stats); stats->rx_packets++; stats->rx_bytes += skb->len; + ieee80211_monitor_rx(ieee, skb, rx_stats); return 1; } @@ -778,33 +779,44 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, return 0; } -/* Filter out unrelated packets, call ieee80211_rx[_mgt] */ -int ieee80211_rx_any(struct ieee80211_device *ieee, +/* Filter out unrelated packets, call ieee80211_rx[_mgt] + * This function takes over the skb, it should not be used again after calling + * this function. */ +void ieee80211_rx_any(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_rx_stats *stats) { struct ieee80211_hdr_4addr *hdr; int is_packet_for_us; u16 fc; - if (ieee->iw_mode == IW_MODE_MONITOR) - return ieee80211_rx(ieee, skb, stats) ? 0 : -EINVAL; + if (ieee->iw_mode == IW_MODE_MONITOR) { + if (!ieee80211_rx(ieee, skb, stats)) + dev_kfree_skb_irq(skb); + return; + } + + if (skb->len < sizeof(struct ieee80211_hdr)) + goto drop_free; hdr = (struct ieee80211_hdr_4addr *)skb->data; fc = le16_to_cpu(hdr->frame_ctl); if ((fc & IEEE80211_FCTL_VERS) != 0) - return -EINVAL; + goto drop_free; switch (fc & IEEE80211_FCTL_FTYPE) { case IEEE80211_FTYPE_MGMT: + if (skb->len < sizeof(struct ieee80211_hdr_3addr)) + goto drop_free; ieee80211_rx_mgt(ieee, hdr, stats); - return 0; + dev_kfree_skb_irq(skb); + return; case IEEE80211_FTYPE_DATA: break; case IEEE80211_FTYPE_CTL: - return 0; + return; default: - return -EINVAL; + return; } is_packet_for_us = 0; @@ -848,8 +860,14 @@ int ieee80211_rx_any(struct ieee80211_device *ieee, } if (is_packet_for_us) - return (ieee80211_rx(ieee, skb, stats) ? 0 : -EINVAL); - return 0; + if (!ieee80211_rx(ieee, skb, stats)) + dev_kfree_skb_irq(skb); + return; + +drop_free: + dev_kfree_skb_irq(skb); + ieee->stats.rx_dropped++; + return; } #define MGMT_FRAME_FIXED_PART_LENGTH 0x24 @@ -1060,13 +1078,16 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element while (length >= sizeof(*info_element)) { if (sizeof(*info_element) + info_element->len > length) { - IEEE80211_DEBUG_MGMT("Info elem: parse failed: " - "info_element->len + 2 > left : " - "info_element->len+2=%zd left=%d, id=%d.\n", - info_element->len + - sizeof(*info_element), - length, info_element->id); - return 1; + IEEE80211_ERROR("Info elem: parse failed: " + "info_element->len + 2 > left : " + "info_element->len+2=%zd left=%d, id=%d.\n", + info_element->len + + sizeof(*info_element), + length, info_element->id); + /* We stop processing but don't return an error here + * because some misbehaviour APs break this rule. ie. + * Orinoco AP1000. */ + break; } switch (info_element->id) { @@ -1165,6 +1186,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element case MFIE_TYPE_ERP_INFO: network->erp_value = info_element->data[0]; + network->flags |= NETWORK_HAS_ERP_VALUE; IEEE80211_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n", network->erp_value); break; @@ -1728,5 +1750,6 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, } } +EXPORT_SYMBOL_GPL(ieee80211_rx_any); EXPORT_SYMBOL(ieee80211_rx_mgt); EXPORT_SYMBOL(ieee80211_rx); diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index 6a5de1b84459..ae254497ba3d 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -24,7 +24,6 @@ ******************************************************************************/ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> @@ -338,7 +337,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) hdr_len += 2; skb->priority = ieee80211_classify(skb); - header.qos_ctl |= skb->priority & IEEE80211_QCTL_TID; + header.qos_ctl |= cpu_to_le16(skb->priority & IEEE80211_QCTL_TID); } header.frame_ctl = cpu_to_le16(fc); @@ -533,13 +532,6 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } - if (ret == NETDEV_TX_BUSY) { - printk(KERN_ERR "%s: NETDEV_TX_BUSY returned; " - "driver should report queue full via " - "ieee_device->is_queue_full.\n", - ieee->dev->name); - } - ieee80211_txb_free(txb); } @@ -563,10 +555,13 @@ int ieee80211_tx_frame(struct ieee80211_device *ieee, struct net_device_stats *stats = &ieee->stats; struct sk_buff *skb_frag; int priority = -1; + int fraglen = total_len; + int headroom = ieee->tx_headroom; + struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; spin_lock_irqsave(&ieee->lock, flags); - if (encrypt_mpdu && !ieee->sec.encrypt) + if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt)) encrypt_mpdu = 0; /* If there is no driver handler to take the TXB, dont' bother @@ -582,20 +577,24 @@ int ieee80211_tx_frame(struct ieee80211_device *ieee, goto success; } - if (encrypt_mpdu) + if (encrypt_mpdu) { frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + fraglen += crypt->ops->extra_mpdu_prefix_len + + crypt->ops->extra_mpdu_postfix_len; + headroom += crypt->ops->extra_mpdu_prefix_len; + } /* When we allocate the TXB we allocate enough space for the reserve * and full fragment bytes (bytes_per_frag doesn't include prefix, * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(1, total_len, ieee->tx_headroom, GFP_ATOMIC); + txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC); if (unlikely(!txb)) { printk(KERN_WARNING "%s: Could not allocate TXB\n", ieee->dev->name); goto failed; } txb->encrypted = 0; - txb->payload_size = total_len; + txb->payload_size = fraglen; skb_frag = txb->fragments[0]; diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index a78c4f845f66..5cb9cfd35397 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -369,11 +369,10 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, struct ieee80211_crypt_data *new_crypt; /* take WEP into use */ - new_crypt = kmalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) return -ENOMEM; - memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data)); new_crypt->ops = ieee80211_get_crypto_ops("WEP"); if (!new_crypt->ops) { request_module("ieee80211_crypt_wep"); @@ -616,13 +615,11 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, ieee80211_crypt_delayed_deinit(ieee, crypt); - new_crypt = (struct ieee80211_crypt_data *) - kmalloc(sizeof(*new_crypt), GFP_KERNEL); + new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL); if (new_crypt == NULL) { ret = -ENOMEM; goto done; } - memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data)); new_crypt->ops = ops; if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) new_crypt->priv = new_crypt->ops->init(idx); diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index 5e9a90651d04..589f6d2c548a 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -47,9 +47,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft dprintk(KERN_INFO PFX "sent association request!\n"); - /* Change the state to associating */ spin_lock_irqsave(&mac->lock, flags); - mac->associnfo.associating = 1; mac->associated = 0; /* just to make sure */ /* Set a timer for timeout */ @@ -63,6 +61,7 @@ void ieee80211softmac_assoc_timeout(void *d) { struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d; + struct ieee80211softmac_network *n; unsigned long flags; spin_lock_irqsave(&mac->lock, flags); @@ -75,11 +74,12 @@ ieee80211softmac_assoc_timeout(void *d) mac->associnfo.associating = 0; mac->associnfo.bssvalid = 0; mac->associated = 0; + + n = ieee80211softmac_get_network_by_bssid_locked(mac, mac->associnfo.bssid); spin_unlock_irqrestore(&mac->lock, flags); dprintk(KERN_INFO PFX "assoc request timed out!\n"); - /* FIXME: we need to know the network here. that requires a bit of restructuring */ - ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, NULL); + ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, n); } void @@ -96,7 +96,7 @@ ieee80211softmac_disassoc(struct ieee80211softmac_device *mac) mac->associated = 0; mac->associnfo.bssvalid = 0; mac->associnfo.associating = 0; - ieee80211softmac_init_txrates(mac); + ieee80211softmac_init_bss(mac); ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_DISASSOCIATED, NULL); spin_unlock_irqrestore(&mac->lock, flags); } @@ -203,6 +203,10 @@ ieee80211softmac_assoc_work(void *d) if (mac->associated) ieee80211softmac_send_disassoc_req(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT); + spin_lock_irqsave(&mac->lock, flags); + mac->associnfo.associating = 1; + spin_unlock_irqrestore(&mac->lock, flags); + /* try to find the requested network in our list, if we found one already */ if (bssvalid || mac->associnfo.bssfixed) found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid); @@ -295,19 +299,32 @@ ieee80211softmac_assoc_work(void *d) memcpy(mac->associnfo.associate_essid.data, found->essid.data, IW_ESSID_MAX_SIZE + 1); /* we found a network! authenticate (if necessary) and associate to it. */ - if (!found->authenticated) { + if (found->authenticating) { + dprintk(KERN_INFO PFX "Already requested authentication, waiting...\n"); + if(!mac->associnfo.assoc_wait) { + mac->associnfo.assoc_wait = 1; + ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL); + } + return; + } + if (!found->authenticated && !found->authenticating) { /* This relies on the fact that _auth_req only queues the work, * otherwise adding the notification would be racy. */ if (!ieee80211softmac_auth_req(mac, found)) { - dprintk(KERN_INFO PFX "cannot associate without being authenticated, requested authentication\n"); - ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL); + if(!mac->associnfo.assoc_wait) { + dprintk(KERN_INFO PFX "Cannot associate without being authenticated, requested authentication\n"); + mac->associnfo.assoc_wait = 1; + ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL); + } } else { printkl(KERN_WARNING PFX "Not authenticated, but requesting authentication failed. Giving up to associate\n"); + mac->associnfo.assoc_wait = 0; ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, found); } return; } /* finally! now we can start associating */ + mac->associnfo.assoc_wait = 0; ieee80211softmac_assoc(mac, found); } @@ -317,11 +334,19 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac, struct ieee80211_assoc_response * resp, struct ieee80211softmac_network *net) { + u16 cap = le16_to_cpu(resp->capability); + u8 erp_value = net->erp_value; + mac->associnfo.associating = 0; - mac->associnfo.supported_rates = net->supported_rates; + mac->bssinfo.supported_rates = net->supported_rates; ieee80211softmac_recalc_txrates(mac); mac->associated = 1; + + mac->associnfo.short_preamble_available = + (cap & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0; + ieee80211softmac_process_erp(mac, erp_value); + if (mac->set_bssid_filter) mac->set_bssid_filter(mac->dev, net->bssid); memcpy(mac->ieee->bssid, net->bssid, ETH_ALEN); @@ -334,9 +359,9 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac, int ieee80211softmac_handle_assoc_response(struct net_device * dev, struct ieee80211_assoc_response * resp, - struct ieee80211_network * _ieee80211_network_do_not_use) + struct ieee80211_network * _ieee80211_network) { - /* NOTE: the network parameter has to be ignored by + /* NOTE: the network parameter has to be mostly ignored by * this code because it is the ieee80211's pointer * to the struct, not ours (we made a copy) */ @@ -368,6 +393,11 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, /* now that we know it was for us, we can cancel the timeout */ cancel_delayed_work(&mac->associnfo.timeout); + /* if the association response included an ERP IE, update our saved + * copy */ + if (_ieee80211_network->flags & NETWORK_HAS_ERP_VALUE) + network->erp_value = _ieee80211_network->erp_value; + switch (status) { case 0: dprintk(KERN_INFO PFX "associated!\n"); diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index 90b8484e509b..4cef39e171d0 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -36,8 +36,9 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, struct ieee80211softmac_auth_queue_item *auth; unsigned long flags; - if (net->authenticating) + if (net->authenticating || net->authenticated) return 0; + net->authenticating = 1; /* Add the network if it's not already added */ ieee80211softmac_add_network(mac, net); @@ -92,7 +93,6 @@ ieee80211softmac_auth_queue(void *data) return; } net->authenticated = 0; - net->authenticating = 1; /* add a timeout call so we eventually give up waiting for an auth reply */ schedule_delayed_work(&auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT); auth->retry--; @@ -116,6 +116,16 @@ ieee80211softmac_auth_queue(void *data) kfree(auth); } +/* Sends a response to an auth challenge (for shared key auth). */ +static void +ieee80211softmac_auth_challenge_response(void *_aq) +{ + struct ieee80211softmac_auth_queue_item *aq = _aq; + + /* Send our response */ + ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); +} + /* Handle the auth response from the AP * This should be registered with ieee80211 as handle_auth */ @@ -197,24 +207,30 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE: /* Check to make sure we have a challenge IE */ data = (u8 *)auth->info_element; - if(*data++ != MFIE_TYPE_CHALLENGE){ + if (*data++ != MFIE_TYPE_CHALLENGE) { printkl(KERN_NOTICE PFX "Shared Key Authentication failed due to a missing challenge.\n"); break; } /* Save the challenge */ spin_lock_irqsave(&mac->lock, flags); net->challenge_len = *data++; - if(net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) + if (net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) net->challenge_len = WLAN_AUTH_CHALLENGE_LEN; - if(net->challenge != NULL) + if (net->challenge != NULL) kfree(net->challenge); net->challenge = kmalloc(net->challenge_len, GFP_ATOMIC); memcpy(net->challenge, data, net->challenge_len); aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE; - spin_unlock_irqrestore(&mac->lock, flags); - /* Send our response */ - ieee80211softmac_send_mgt_frame(mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); + /* We reuse the work struct from the auth request here. + * It is safe to do so as each one is per-request, and + * at this point (dealing with authentication response) + * we have obviously already sent the initial auth + * request. */ + cancel_delayed_work(&aq->work); + INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq); + schedule_work(&aq->work); + spin_unlock_irqrestore(&mac->lock, flags); return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: kfree(net->challenge); diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c index 09541611e48c..82bfddbf33a2 100644 --- a/net/ieee80211/softmac/ieee80211softmac_io.c +++ b/net/ieee80211/softmac/ieee80211softmac_io.c @@ -96,8 +96,7 @@ ieee80211softmac_alloc_mgt(u32 size) if(size > IEEE80211_DATA_LEN) return NULL; /* Allocate the frame */ - data = kmalloc(size, GFP_ATOMIC); - memset(data, 0, size); + data = kzalloc(size, GFP_ATOMIC); return data; } @@ -229,6 +228,9 @@ ieee80211softmac_assoc_req(struct ieee80211_assoc_request **pkt, return 0; ieee80211softmac_hdr_3addr(mac, &((*pkt)->header), IEEE80211_STYPE_ASSOC_REQ, net->bssid, net->bssid); + /* Fill in the capabilities */ + (*pkt)->capability = ieee80211softmac_capabilities(mac, net); + /* Fill in Listen Interval (?) */ (*pkt)->listen_interval = cpu_to_le16(10); @@ -465,3 +467,17 @@ ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, kfree(pkt); return 0; } + +/* Beacon handling */ +int ieee80211softmac_handle_beacon(struct net_device *dev, + struct ieee80211_beacon *beacon, + struct ieee80211_network *network) +{ + struct ieee80211softmac_device *mac = ieee80211_priv(dev); + + if (mac->associated && memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0) + ieee80211softmac_process_erp(mac, network->erp_value); + + return 0; +} + diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index 4b2e57d12418..addea1cf73ae 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -44,6 +44,7 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) softmac->ieee->handle_assoc_response = ieee80211softmac_handle_assoc_response; softmac->ieee->handle_reassoc_request = ieee80211softmac_handle_reassoc_req; softmac->ieee->handle_disassoc = ieee80211softmac_handle_disassoc; + softmac->ieee->handle_beacon = ieee80211softmac_handle_beacon; softmac->scaninfo = NULL; softmac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT; @@ -178,21 +179,14 @@ int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo return 0; } -/* Finds the highest rate which is: - * 1. Present in ri (optionally a basic rate) - * 2. Supported by the device - * 3. Less than or equal to the user-defined rate - */ -static u8 highest_supported_rate(struct ieee80211softmac_device *mac, +u8 ieee80211softmac_highest_supported_rate(struct ieee80211softmac_device *mac, struct ieee80211softmac_ratesinfo *ri, int basic_only) { u8 user_rate = mac->txrates.user_rate; int i; - if (ri->count == 0) { - dprintk(KERN_ERR PFX "empty ratesinfo?\n"); + if (ri->count == 0) return IEEE80211_CCK_RATE_1MB; - } for (i = ri->count - 1; i >= 0; i--) { u8 rate = ri->rates[i]; @@ -208,36 +202,61 @@ static u8 highest_supported_rate(struct ieee80211softmac_device *mac, /* If we haven't found a suitable rate by now, just trust the user */ return user_rate; } +EXPORT_SYMBOL_GPL(ieee80211softmac_highest_supported_rate); + +void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac, + u8 erp_value) +{ + int use_protection; + int short_preamble; + u32 changes = 0; + + /* Barker preamble mode */ + short_preamble = ((erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0 + && mac->associnfo.short_preamble_available) ? 1 : 0; + + /* Protection needed? */ + use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + + if (mac->bssinfo.short_preamble != short_preamble) { + changes |= IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE; + mac->bssinfo.short_preamble = short_preamble; + } + + if (mac->bssinfo.use_protection != use_protection) { + changes |= IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION; + mac->bssinfo.use_protection = use_protection; + } + + if (mac->bssinfo_change && changes) + mac->bssinfo_change(mac->dev, changes); +} void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac) { struct ieee80211softmac_txrates *txrates = &mac->txrates; - struct ieee80211softmac_txrates oldrates; u32 change = 0; - if (mac->txrates_change) - oldrates = mac->txrates; - change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; - txrates->default_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 0); + txrates->default_rate = ieee80211softmac_highest_supported_rate(mac, &mac->bssinfo.supported_rates, 0); change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; txrates->default_fallback = lower_rate(mac, txrates->default_rate); change |= IEEE80211SOFTMAC_TXRATECHG_MCAST; - txrates->mcast_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 1); + txrates->mcast_rate = ieee80211softmac_highest_supported_rate(mac, &mac->bssinfo.supported_rates, 1); if (mac->txrates_change) - mac->txrates_change(mac->dev, change, &oldrates); + mac->txrates_change(mac->dev, change); } -void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) +void ieee80211softmac_init_bss(struct ieee80211softmac_device *mac) { struct ieee80211_device *ieee = mac->ieee; u32 change = 0; struct ieee80211softmac_txrates *txrates = &mac->txrates; - struct ieee80211softmac_txrates oldrates; + struct ieee80211softmac_bss_info *bssinfo = &mac->bssinfo; /* TODO: We need some kind of state machine to lower the default rates * if we loose too many packets. @@ -245,8 +264,6 @@ void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) /* Change the default txrate to the highest possible value. * The txrate machine will lower it, if it is too high. */ - if (mac->txrates_change) - oldrates = mac->txrates; /* FIXME: We don't correctly handle backing down to lower rates, so 801.11g devices start off at 11M for now. People can manually change it if they really need to, but 11M is @@ -272,7 +289,23 @@ void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) change |= IEEE80211SOFTMAC_TXRATECHG_MGT_MCAST; if (mac->txrates_change) - mac->txrates_change(mac->dev, change, &oldrates); + mac->txrates_change(mac->dev, change); + + change = 0; + + bssinfo->supported_rates.count = 0; + memset(bssinfo->supported_rates.rates, 0, + sizeof(bssinfo->supported_rates.rates)); + change |= IEEE80211SOFTMAC_BSSINFOCHG_RATES; + + bssinfo->short_preamble = 0; + change |= IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE; + + bssinfo->use_protection = 0; + change |= IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION; + + if (mac->bssinfo_change) + mac->bssinfo_change(mac->dev, change); mac->running = 1; } @@ -282,7 +315,7 @@ void ieee80211softmac_start(struct net_device *dev) struct ieee80211softmac_device *mac = ieee80211_priv(dev); ieee80211softmac_start_check_rates(mac); - ieee80211softmac_init_txrates(mac); + ieee80211softmac_init_bss(mac); } EXPORT_SYMBOL_GPL(ieee80211softmac_start); @@ -335,7 +368,6 @@ u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rat static void ieee80211softmac_add_txrates_badness(struct ieee80211softmac_device *mac, int amount) { - struct ieee80211softmac_txrates oldrates; u8 default_rate = mac->txrates.default_rate; u8 default_fallback = mac->txrates.default_fallback; u32 changes = 0; @@ -348,8 +380,6 @@ printk("badness %d\n", mac->txrate_badness); mac->txrate_badness += amount; if (mac->txrate_badness <= -1000) { /* Very small badness. Try a faster bitrate. */ - if (mac->txrates_change) - memcpy(&oldrates, &mac->txrates, sizeof(oldrates)); default_rate = raise_rate(mac, default_rate); changes |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; default_fallback = get_fallback_rate(mac, default_rate); @@ -358,8 +388,6 @@ printk("badness %d\n", mac->txrate_badness); printk("Bitrate raised to %u\n", default_rate); } else if (mac->txrate_badness >= 10000) { /* Very high badness. Try a slower bitrate. */ - if (mac->txrates_change) - memcpy(&oldrates, &mac->txrates, sizeof(oldrates)); default_rate = lower_rate(mac, default_rate); changes |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; default_fallback = get_fallback_rate(mac, default_rate); @@ -372,7 +400,7 @@ printk("Bitrate lowered to %u\n", default_rate); mac->txrates.default_fallback = default_fallback; if (changes && mac->txrates_change) - mac->txrates_change(mac->dev, changes, &oldrates); + mac->txrates_change(mac->dev, changes); } void ieee80211softmac_fragment_lost(struct net_device *dev, @@ -416,7 +444,11 @@ ieee80211softmac_create_network(struct ieee80211softmac_device *mac, memcpy(&softnet->supported_rates.rates[softnet->supported_rates.count], net->rates_ex, net->rates_ex_len); softnet->supported_rates.count += net->rates_ex_len; sort(softnet->supported_rates.rates, softnet->supported_rates.count, sizeof(softnet->supported_rates.rates[0]), rate_cmp, NULL); - + + /* we save the ERP value because it is needed at association time, and + * many AP's do not include an ERP IE in the association response. */ + softnet->erp_value = net->erp_value; + softnet->capabilities = net->capability; return softnet; } diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h index fa1f8e3acfc0..0642e090b8a7 100644 --- a/net/ieee80211/softmac/ieee80211softmac_priv.h +++ b/net/ieee80211/softmac/ieee80211softmac_priv.h @@ -116,9 +116,11 @@ ieee80211softmac_get_network_by_essid(struct ieee80211softmac_device *mac, struct ieee80211softmac_essid *essid); /* Rates related */ +void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac, + u8 erp_value); int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo *ri, u8 rate); u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rate, int delta); -void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac); +void ieee80211softmac_init_bss(struct ieee80211softmac_device *mac); void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac); static inline u8 lower_rate(struct ieee80211softmac_device *mac, u8 rate) { return ieee80211softmac_lower_rate_delta(mac, rate, 1); @@ -133,6 +135,9 @@ static inline u8 get_fallback_rate(struct ieee80211softmac_device *mac, u8 rate) /*** prototypes from _io.c */ int ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, void* ptrarg, u32 type, u32 arg); +int ieee80211softmac_handle_beacon(struct net_device *dev, + struct ieee80211_beacon *beacon, + struct ieee80211_network *network); /*** prototypes from _auth.c */ /* do these have to go into the public header? */ @@ -189,6 +194,7 @@ struct ieee80211softmac_network { authenticated:1, auth_desynced_once:1; + u8 erp_value; /* Saved ERP value */ u16 capabilities; /* Capabilities bitfield */ u8 challenge_len; /* Auth Challenge length */ char *challenge; /* Challenge Text */ diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 0e65ff4e33fc..2aa779d18f38 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -70,15 +70,47 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, char *extra) { struct ieee80211softmac_device *sm = ieee80211_priv(net_dev); + struct ieee80211softmac_network *n; + struct ieee80211softmac_auth_queue_item *authptr; int length = 0; unsigned long flags; - + + /* Check if we're already associating to this or another network + * If it's another network, cancel and start over with our new network + * If it's our network, ignore the change, we're already doing it! + */ + if((sm->associnfo.associating || sm->associated) && + (data->essid.flags && data->essid.length)) { + /* Get the associating network */ + n = ieee80211softmac_get_network_by_bssid(sm, sm->associnfo.bssid); + if(n && n->essid.len == data->essid.length && + !memcmp(n->essid.data, extra, n->essid.len)) { + dprintk(KERN_INFO PFX "Already associating or associated to "MAC_FMT"\n", + MAC_ARG(sm->associnfo.bssid)); + return 0; + } else { + dprintk(KERN_INFO PFX "Canceling existing associate request!\n"); + spin_lock_irqsave(&sm->lock,flags); + /* Cancel assoc work */ + cancel_delayed_work(&sm->associnfo.work); + /* We don't have to do this, but it's a little cleaner */ + list_for_each_entry(authptr, &sm->auth_queue, list) + cancel_delayed_work(&authptr->work); + sm->associnfo.bssvalid = 0; + sm->associnfo.bssfixed = 0; + spin_unlock_irqrestore(&sm->lock,flags); + flush_scheduled_work(); + } + } + + spin_lock_irqsave(&sm->lock, flags); - + sm->associnfo.static_essid = 0; + sm->associnfo.assoc_wait = 0; - if (data->essid.flags && data->essid.length && extra /*required?*/) { - length = min(data->essid.length - 1, IW_ESSID_MAX_SIZE); + if (data->essid.flags && data->essid.length) { + length = min((int)data->essid.length, IW_ESSID_MAX_SIZE); if (length) { memcpy(sm->associnfo.req_essid.data, extra, length); sm->associnfo.static_essid = 1; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index da33393be45f..30af4a4dfcc8 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -88,6 +88,7 @@ config IP_FIB_HASH config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER + select FIB_RULES ---help--- Normally, a router decides what to do with a received packet based solely on the packet's final destination address. If you say Y here, @@ -386,6 +387,7 @@ config INET_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- @@ -446,24 +448,22 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG -config TCP_CONG_ADVANCED +menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- Support for selection of various TCP congestion control modules. Nearly all users can safely say no here, and a safe default - selection will be made (BIC-TCP with new Reno as a fallback). + selection will be made (CUBIC with new Reno as a fallback). If unsure, say N. -# TCP Reno is builtin (required as fallback) -menu "TCP congestion control" - depends on TCP_CONG_ADVANCED +if TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" - default y + default m ---help--- BIC-TCP is a sender-side only change that ensures a linear RTT fairness under large windows while offering both scalability and @@ -477,7 +477,7 @@ config TCP_CONG_BIC config TCP_CONG_CUBIC tristate "CUBIC TCP" - default m + default y ---help--- This is version 2.0 of BIC-TCP which uses a cubic growth function among other techniques. @@ -572,22 +572,49 @@ config TCP_CONG_VENO loss packets. See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf -config TCP_CONG_COMPOUND - tristate "TCP Compound" - depends on EXPERIMENTAL - default n - ---help--- - TCP Compound is a sender-side only change to TCP that uses - a mixed Reno/Vegas approach to calculate the cwnd. - For further details look here: - ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf +choice + prompt "Default TCP congestion control" + default DEFAULT_CUBIC + help + Select the TCP congestion control that will be used by default + for all connections. -endmenu + config DEFAULT_BIC + bool "Bic" if TCP_CONG_BIC=y -config TCP_CONG_BIC + config DEFAULT_CUBIC + bool "Cubic" if TCP_CONG_CUBIC=y + + config DEFAULT_HTCP + bool "Htcp" if TCP_CONG_HTCP=y + + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + + config DEFAULT_WESTWOOD + bool "Westwood" if TCP_CONG_WESTWOOD=y + + config DEFAULT_RENO + bool "Reno" + +endchoice + +endif + +config TCP_CONG_CUBIC tristate depends on !TCP_CONG_ADVANCED default y +config DEFAULT_TCP_CONG + string + default "bic" if DEFAULT_BIC + default "cubic" if DEFAULT_CUBIC + default "htcp" if DEFAULT_HTCP + default "vegas" if DEFAULT_VEGAS + default "westwood" if DEFAULT_WESTWOOD + default "reno" if DEFAULT_RENO + default "cubic" + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 38b8039bdd55..f66049e28aeb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o -obj-$(CONFIG_TCP_CONG_COMPOUND) += tcp_compound.o +obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 461216b47948..edcf0932ac6d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -67,7 +67,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/types.h> @@ -392,7 +391,7 @@ int inet_release(struct socket *sock) } /* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind; +int sysctl_ip_nonlocal_bind __read_mostly; int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p) * Shall we try to damage output packets if routing dev changes? */ -int sysctl_ip_dynaddr; +int sysctl_ip_dynaddr __read_mostly; static int inet_sk_reselect_saddr(struct sock *sk) { @@ -997,7 +996,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct rtable *rt; __u32 old_saddr = inet->saddr; __u32 new_saddr; - __u32 daddr = inet->daddr; + __be32 daddr = inet->daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; @@ -1044,7 +1043,7 @@ int inet_sk_rebuild_header(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; + __be32 daddr; int err; /* Route is OK, nothing to do. */ @@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) @@ -1097,7 +1097,41 @@ int inet_sk_rebuild_header(struct sock *sk) EXPORT_SYMBOL(inet_sk_rebuild_header); -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg) +static int inet_gso_send_check(struct sk_buff *skb) +{ + struct iphdr *iph; + struct net_protocol *ops; + int proto; + int ihl; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = skb->nh.iph; + ihl = iph->ihl * 4; + if (ihl < sizeof(*iph)) + goto out; + + if (unlikely(!pskb_may_pull(skb, ihl))) + goto out; + + skb->h.raw = __skb_pull(skb, ihl); + iph = skb->nh.iph; + proto = iph->protocol & (MAX_INET_PROTOS - 1); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (likely(ops && ops->gso_send_check)) + err = ops->gso_send_check(skb); + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; @@ -1106,7 +1140,15 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg) int ihl; int id; - if (!pskb_may_pull(skb, sizeof(*iph))) + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; iph = skb->nh.iph; @@ -1114,7 +1156,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg) if (ihl < sizeof(*iph)) goto out; - if (!pskb_may_pull(skb, ihl)) + if (unlikely(!pskb_may_pull(skb, ihl))) goto out; skb->h.raw = __skb_pull(skb, ihl); @@ -1125,11 +1167,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg) rcu_read_lock(); ops = rcu_dereference(inet_protos[proto]); - if (ops && ops->gso_segment) - segs = ops->gso_segment(skb, sg); + if (likely(ops && ops->gso_segment)) + segs = ops->gso_segment(skb, features); rcu_read_unlock(); - if (IS_ERR(segs)) + if (!segs || unlikely(IS_ERR(segs))) goto out; skb = segs; @@ -1154,6 +1196,7 @@ static struct net_protocol igmp_protocol = { static struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, + .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .no_policy = 1, }; @@ -1200,6 +1243,7 @@ static int ipv4_proc_init(void); static struct packet_type ip_packet_type = { .type = __constant_htons(ETH_P_IP), .func = ip_rcv, + .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, }; @@ -1210,10 +1254,7 @@ static int __init inet_init(void) struct list_head *r; int rc = -EINVAL; - if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "%s: panic\n", __FUNCTION__); - goto out; - } + BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); rc = proto_register(&tcp_prot, 1); if (rc) @@ -1301,10 +1342,10 @@ static int __init inet_init(void) rc = 0; out: return rc; -out_unregister_tcp_proto: - proto_unregister(&tcp_prot); out_unregister_udp_proto: proto_unregister(&udp_prot); +out_unregister_tcp_proto: + proto_unregister(&tcp_prot); goto out; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index c7782230080d..99542977e47e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,4 +1,4 @@ -#include <linux/config.h> +#include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ - case 0x86: /* Another "Commercial Security" crap. */ + case IPOPT_CIPSO: case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; @@ -98,7 +98,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -120,6 +123,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; + int err = -EINVAL; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; @@ -167,8 +171,11 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); skb_push(skb, ihl); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { x->stats.integrity_failed++; goto out; } @@ -180,7 +187,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) return 0; out: - return -EINVAL; + return err; } static void ah4_err(struct sk_buff *skb, u32 info) @@ -205,6 +212,7 @@ static int ah_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -216,32 +224,33 @@ static int ah_init_state(struct xfrm_state *x) if (x->encap) goto error; - ahp = kmalloc(sizeof(*ahp), GFP_KERNEL); + ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); if (ahp == NULL) return -ENOMEM; - memset(ahp, 0, sizeof(*ahp)); - ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -256,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; @@ -265,7 +274,7 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -280,7 +289,7 @@ static void ah_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4749d504c629..cfe5c8474286 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -80,7 +80,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/errno.h> @@ -235,7 +234,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev) static int arp_constructor(struct neighbour *neigh) { - u32 addr = *(u32*)neigh->primary_key; + __be32 addr = *(__be32*)neigh->primary_key; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; @@ -331,10 +330,10 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { - u32 saddr = 0; + __be32 saddr = 0; u8 *dst_ha = NULL; struct net_device *dev = neigh->dev; - u32 target = *(u32*)neigh->primary_key; + __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); struct in_device *in_dev = in_dev_get(dev); @@ -386,7 +385,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } static int arp_ignore(struct in_device *in_dev, struct net_device *dev, - u32 sip, u32 tip) + __be32 sip, __be32 tip) { int scope; @@ -421,7 +420,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev, return !inet_confirm_addr(dev, sip, tip, scope); } -static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) +static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } }; @@ -450,7 +449,7 @@ static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) * is allowed to use this function, it is scheduled to be removed. --ANK */ -static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct net_device * dev) +static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) { switch (addr_hint) { case RTN_LOCAL: @@ -471,7 +470,7 @@ static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, s int arp_find(unsigned char *haddr, struct sk_buff *skb) { struct net_device *dev = skb->dev; - u32 paddr; + __be32 paddr; struct neighbour *n; if (!skb->dst) { @@ -512,7 +511,7 @@ int arp_bind_neighbour(struct dst_entry *dst) if (dev == NULL) return -EINVAL; if (n == NULL) { - u32 nexthop = ((struct rtable*)dst)->rt_gateway; + __be32 nexthop = ((struct rtable*)dst)->rt_gateway; if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) nexthop = 0; n = __neigh_lookup_errno( @@ -561,8 +560,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) * Create an arp packet. If (dest_hw == NULL), we create a broadcast * message. */ -struct sk_buff *arp_create(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw) { @@ -676,8 +675,8 @@ void arp_xmit(struct sk_buff *skb) /* * Create and send an arp packet. */ -void arp_send(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +void arp_send(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw) { @@ -711,7 +710,7 @@ static int arp_process(struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha, *tha; - u32 sip, tip; + __be32 sip, tip; u16 dev_type = dev->type; int addr_type; struct neighbour *n; @@ -970,13 +969,13 @@ out_of_mem: static int arp_req_set(struct arpreq *r, struct net_device * dev) { - u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; int err; if (r->arp_flags&ATF_PUBL) { - u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; - if (mask && mask != 0xFFFFFFFF) + __be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; + if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); @@ -1064,7 +1063,7 @@ static unsigned arp_state_to_flags(struct neighbour *neigh) static int arp_req_get(struct arpreq *r, struct net_device *dev) { - u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; int err = -ENXIO; @@ -1085,13 +1084,13 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) static int arp_req_delete(struct arpreq *r, struct net_device * dev) { int err; - u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) { - u32 mask = + __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) return pneigh_delete(&arp_tbl, &ip, dev); if (mask == 0) { if (dev == NULL) { @@ -1373,12 +1372,11 @@ static int arp_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct neigh_seq_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; - memset(s, 0, sizeof(*s)); rc = seq_open(file, &arp_seq_ops); if (rc) goto out_kfree; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c new file mode 100644 index 000000000000..a8e2e879a647 --- /dev/null +++ b/net/ipv4/cipso_ipv4.c @@ -0,0 +1,1474 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/jhash.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/tcp.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> +#include <asm/bug.h> + +struct cipso_v4_domhsh_entry { + char *domain; + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* List of available DOI definitions */ +/* XXX - Updates should be minimal so having a single lock for the + * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be + * okay. */ +/* XXX - This currently assumes a minimal number of different DOIs in use, + * if in practice there are a lot of different DOIs this list should + * probably be turned into a hash table or something similar so we + * can do quick lookups. */ +static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); + +/* Label mapping cache */ +int cipso_v4_cache_enabled = 1; +int cipso_v4_cache_bucketsize = 10; +#define CIPSO_V4_CACHE_BUCKETBITS 7 +#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) +#define CIPSO_V4_CACHE_REORDERLIMIT 10 +struct cipso_v4_map_cache_bkt { + spinlock_t lock; + u32 size; + struct list_head list; +}; +struct cipso_v4_map_cache_entry { + u32 hash; + unsigned char *key; + size_t key_len; + + struct netlbl_lsm_cache lsm_data; + + u32 activity; + struct list_head list; +}; +static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL; + +/* Restricted bitmap (tag #1) flags */ +int cipso_v4_rbm_optfmt = 0; +int cipso_v4_rbm_strictvalid = 1; + +/* + * Helper Functions + */ + +/** + * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit + * @bitmap: the bitmap + * @bitmap_len: length in bits + * @offset: starting offset + * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit + * + * Description: + * Starting at @offset, walk the bitmap from left to right until either the + * desired bit is found or we reach the end. Return the bit offset, -1 if + * not found, or -2 if error. + */ +static int cipso_v4_bitmap_walk(const unsigned char *bitmap, + u32 bitmap_len, + u32 offset, + u8 state) +{ + u32 bit_spot; + u32 byte_offset; + unsigned char bitmask; + unsigned char byte; + + /* gcc always rounds to zero when doing integer division */ + byte_offset = offset / 8; + byte = bitmap[byte_offset]; + bit_spot = offset; + bitmask = 0x80 >> (offset % 8); + + while (bit_spot < bitmap_len) { + if ((state && (byte & bitmask) == bitmask) || + (state == 0 && (byte & bitmask) == 0)) + return bit_spot; + + bit_spot++; + bitmask >>= 1; + if (bitmask == 0) { + byte = bitmap[++byte_offset]; + bitmask = 0x80; + } + } + + return -1; +} + +/** + * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap + * @bitmap: the bitmap + * @bit: the bit + * @state: if non-zero, set the bit (1) else clear the bit (0) + * + * Description: + * Set a single bit in the bitmask. Returns zero on success, negative values + * on error. + */ +static void cipso_v4_bitmap_setbit(unsigned char *bitmap, + u32 bit, + u8 state) +{ + u32 byte_spot; + u8 bitmask; + + /* gcc always rounds to zero when doing integer division */ + byte_spot = bit / 8; + bitmask = 0x80 >> (bit % 8); + if (state) + bitmap[byte_spot] |= bitmask; + else + bitmap[byte_spot] &= ~bitmask; +} + +/** + * cipso_v4_doi_domhsh_free - Frees a domain list entry + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to a domain list entry can be released + * safely. + * + */ +static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) +{ + struct cipso_v4_domhsh_entry *ptr; + + ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); + kfree(ptr->domain); + kfree(ptr); +} + +/** + * cipso_v4_cache_entry_free - Frees a cache entry + * @entry: the entry to free + * + * Description: + * This function frees the memory associated with a cache entry. + * + */ +static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) +{ + if (entry->lsm_data.free) + entry->lsm_data.free(entry->lsm_data.data); + kfree(entry->key); + kfree(entry); +} + +/** + * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache + * @key: the hash key + * @key_len: the length of the key in bytes + * + * Description: + * The CIPSO tag hashing function. Returns a 32-bit hash value. + * + */ +static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) +{ + return jhash(key, key_len, 0); +} + +/* + * Label Mapping Cache Functions + */ + +/** + * cipso_v4_cache_init - Initialize the CIPSO cache + * + * Description: + * Initializes the CIPSO label mapping cache, this function should be called + * before any of the other functions defined in this file. Returns zero on + * success, negative values on error. + * + */ +static int cipso_v4_cache_init(void) +{ + u32 iter; + + cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, + sizeof(struct cipso_v4_map_cache_bkt), + GFP_KERNEL); + if (cipso_v4_cache == NULL) + return -ENOMEM; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock_init(&cipso_v4_cache[iter].lock); + cipso_v4_cache[iter].size = 0; + INIT_LIST_HEAD(&cipso_v4_cache[iter].list); + } + + return 0; +} + +/** + * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache + * + * Description: + * Invalidates and frees any entries in the CIPSO cache. Returns zero on + * success and negative values on failure. + * + */ +void cipso_v4_cache_invalidate(void) +{ + struct cipso_v4_map_cache_entry *entry, *tmp_entry; + u32 iter; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock_bh(&cipso_v4_cache[iter].lock); + list_for_each_entry_safe(entry, + tmp_entry, + &cipso_v4_cache[iter].list, list) { + list_del(&entry->list); + cipso_v4_cache_entry_free(entry); + } + cipso_v4_cache[iter].size = 0; + spin_unlock_bh(&cipso_v4_cache[iter].lock); + } + + return; +} + +/** + * cipso_v4_cache_check - Check the CIPSO cache for a label mapping + * @key: the buffer to check + * @key_len: buffer length in bytes + * @secattr: the security attribute struct to use + * + * Description: + * This function checks the cache to see if a label mapping already exists for + * the given key. If there is a match then the cache is adjusted and the + * @secattr struct is populated with the correct LSM security attributes. The + * cache is adjusted in the following manner if the entry is not already the + * first in the cache bucket: + * + * 1. The cache entry's activity counter is incremented + * 2. The previous (higher ranking) entry's activity counter is decremented + * 3. If the difference between the two activity counters is geater than + * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped + * + * Returns zero on success, -ENOENT for a cache miss, and other negative values + * on error. + * + */ +static int cipso_v4_cache_check(const unsigned char *key, + u32 key_len, + struct netlbl_lsm_secattr *secattr) +{ + u32 bkt; + struct cipso_v4_map_cache_entry *entry; + struct cipso_v4_map_cache_entry *prev_entry = NULL; + u32 hash; + + if (!cipso_v4_cache_enabled) + return -ENOENT; + + hash = cipso_v4_map_cache_hash(key, key_len); + bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock_bh(&cipso_v4_cache[bkt].lock); + list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { + if (entry->hash == hash && + entry->key_len == key_len && + memcmp(entry->key, key, key_len) == 0) { + entry->activity += 1; + secattr->cache.free = entry->lsm_data.free; + secattr->cache.data = entry->lsm_data.data; + if (prev_entry == NULL) { + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + return 0; + } + + if (prev_entry->activity > 0) + prev_entry->activity -= 1; + if (entry->activity > prev_entry->activity && + entry->activity - prev_entry->activity > + CIPSO_V4_CACHE_REORDERLIMIT) { + __list_del(entry->list.prev, entry->list.next); + __list_add(&entry->list, + prev_entry->list.prev, + &prev_entry->list); + } + + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + return 0; + } + prev_entry = entry; + } + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + + return -ENOENT; +} + +/** + * cipso_v4_cache_add - Add an entry to the CIPSO cache + * @skb: the packet + * @secattr: the packet's security attributes + * + * Description: + * Add a new entry into the CIPSO label mapping cache. Add the new entry to + * head of the cache bucket's list, if the cache bucket is out of room remove + * the last entry in the list first. It is important to note that there is + * currently no checking for duplicate keys. Returns zero on success, + * negative values on failure. + * + */ +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 bkt; + struct cipso_v4_map_cache_entry *entry = NULL; + struct cipso_v4_map_cache_entry *old_entry = NULL; + unsigned char *cipso_ptr; + u32 cipso_ptr_len; + + if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + return 0; + + cipso_ptr = CIPSO_V4_OPTPTR(skb); + cipso_ptr_len = cipso_ptr[1]; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + return -ENOMEM; + entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); + if (entry->key == NULL) { + ret_val = -ENOMEM; + goto cache_add_failure; + } + memcpy(entry->key, cipso_ptr, cipso_ptr_len); + entry->key_len = cipso_ptr_len; + entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); + entry->lsm_data.free = secattr->cache.free; + entry->lsm_data.data = secattr->cache.data; + + bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock_bh(&cipso_v4_cache[bkt].lock); + if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache[bkt].size += 1; + } else { + old_entry = list_entry(cipso_v4_cache[bkt].list.prev, + struct cipso_v4_map_cache_entry, list); + list_del(&old_entry->list); + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache_entry_free(old_entry); + } + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + + return 0; + +cache_add_failure: + if (entry) + cipso_v4_cache_entry_free(entry); + return ret_val; +} + +/* + * DOI List Functions + */ + +/** + * cipso_v4_doi_search - Searches for a DOI definition + * @doi: the DOI to search for + * + * Description: + * Search the DOI definition list for a DOI definition with a DOI value that + * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). + * Returns a pointer to the DOI definition on success and NULL on failure. + */ +static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) +{ + struct cipso_v4_doi *iter; + + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->doi == doi && iter->valid) + return iter; + return NULL; +} + +/** + * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine + * @doi_def: the DOI structure + * + * Description: + * The caller defines a new DOI for use by the CIPSO engine and calls this + * function to add it to the list of acceptable domains. The caller must + * ensure that the mapping table specified in @doi_def->map meets all of the + * requirements of the mapping type (see cipso_ipv4.h for details). Returns + * zero on success and non-zero on failure. + * + */ +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) + return -EINVAL; + + doi_def->valid = 1; + INIT_RCU_HEAD(&doi_def->rcu); + INIT_LIST_HEAD(&doi_def->dom_list); + + rcu_read_lock(); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_rlock; + spin_lock(&cipso_v4_doi_list_lock); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_slock; + list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; + +doi_add_failure_slock: + spin_unlock(&cipso_v4_doi_list_lock); +doi_add_failure_rlock: + rcu_read_unlock(); + return -EEXIST; +} + +/** + * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine + * @doi: the DOI value + * @audit_secid: the LSM secid to use in the audit message + * @callback: the DOI cleanup/free callback + * + * Description: + * Removes a DOI definition from the CIPSO engine, @callback is called to + * free any memory. The NetLabel routines will be called to release their own + * LSM domain mappings as well as our own domain list. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_doi_remove(u32 doi, + struct netlbl_audit *audit_info, + void (*callback) (struct rcu_head * head)) +{ + struct cipso_v4_doi *doi_def; + struct cipso_v4_domhsh_entry *dom_iter; + + rcu_read_lock(); + if (cipso_v4_doi_search(doi) != NULL) { + spin_lock(&cipso_v4_doi_list_lock); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + return -ENOENT; + } + doi_def->valid = 0; + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) + if (dom_iter->valid) + netlbl_domhsh_remove(dom_iter->domain, + audit_info); + cipso_v4_cache_invalidate(); + rcu_read_unlock(); + + call_rcu(&doi_def->rcu, callback); + return 0; + } + rcu_read_unlock(); + + return -ENOENT; +} + +/** + * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * @doi: the DOI value + * + * Description: + * Searches for a valid DOI definition and if one is found it is returned to + * the caller. Otherwise NULL is returned. The caller must ensure that + * rcu_read_lock() is held while accessing the returned definition. + * + */ +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return cipso_v4_doi_search(doi); +} + +/** + * cipso_v4_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function + * + * Description: + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. + * + */ +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) +{ + int ret_val = -ENOENT; + u32 doi_cnt = 0; + struct cipso_v4_doi *iter_doi; + + rcu_read_lock(); + list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) + if (iter_doi->valid) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; + } + } + +doi_walk_return: + rcu_read_unlock(); + *skip_cnt = doi_cnt; + return ret_val; +} + +/** + * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to add + * + * Description: + * Adds the @domain to the the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). This function + * does allocate memory. Returns zero on success, negative values on failure. + * + */ +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + struct cipso_v4_domhsh_entry *new_dom; + + new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); + if (new_dom == NULL) + return -ENOMEM; + if (domain) { + new_dom->domain = kstrdup(domain, GFP_KERNEL); + if (new_dom->domain == NULL) { + kfree(new_dom); + return -ENOMEM; + } + } + new_dom->valid = 1; + INIT_RCU_HEAD(&new_dom->rcu); + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + kfree(new_dom->domain); + kfree(new_dom); + return -EEXIST; + } + list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; +} + +/** + * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to remove + * + * Description: + * Removes the @domain from the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). Returns zero + * on success and negative values on error. + * + */ +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + iter->valid = 0; + list_del_rcu(&iter->list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); + + return 0; + } + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return -ENOENT; +} + +/* + * Label Mapping Functions + */ + +/** + * cipso_v4_map_lvl_valid - Checks to see if the given level is understood + * @doi_def: the DOI definition + * @level: the level to check + * + * Description: + * Checks the given level against the given DOI definition and returns a + * negative value if the level does not have a valid mapping and a zero value + * if the level is defined by the DOI. + * + */ +static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network + * @doi_def: the DOI definition + * @host_lvl: the host MLS level + * @net_lvl: the network/CIPSO MLS level + * + * Description: + * Perform a label mapping to translate a local MLS level to the correct + * CIPSO level using the given DOI definition. Returns zero on success, + * negative values otherwise. + * + */ +static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, + u32 host_lvl, + u32 *net_lvl) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *net_lvl = host_lvl; + return 0; + case CIPSO_V4_MAP_STD: + if (host_lvl < doi_def->map.std->lvl.local_size) { + *net_lvl = doi_def->map.std->lvl.local[host_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host + * @doi_def: the DOI definition + * @net_lvl: the network/CIPSO MLS level + * @host_lvl: the host MLS level + * + * Description: + * Perform a label mapping to translate a CIPSO level to the correct local MLS + * level using the given DOI definition. Returns zero on success, negative + * values otherwise. + * + */ +static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, + u32 net_lvl, + u32 *host_lvl) +{ + struct cipso_v4_std_map_tbl *map_tbl; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *host_lvl = net_lvl; + return 0; + case CIPSO_V4_MAP_STD: + map_tbl = doi_def->map.std; + if (net_lvl < map_tbl->lvl.cipso_size && + map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { + *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid + * @doi_def: the DOI definition + * @bitmap: category bitmap + * @bitmap_len: bitmap length in bytes + * + * Description: + * Checks the given category bitmap against the given DOI definition and + * returns a negative value if any of the categories in the bitmap do not have + * a valid mapping and a zero value if all of the categories are valid. + * + */ +static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, + const unsigned char *bitmap, + u32 bitmap_len) +{ + int cat = -1; + u32 bitmap_len_bits = bitmap_len * 8; + u32 cipso_cat_size = doi_def->map.std->cat.cipso_size; + u32 *cipso_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + for (;;) { + cat = cipso_v4_bitmap_walk(bitmap, + bitmap_len_bits, + cat + 1, + 1); + if (cat < 0) + break; + if (cat >= cipso_cat_size || + cipso_array[cat] >= CIPSO_V4_INV_CAT) + return -EFAULT; + } + + if (cat == -1) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @host_cat: the category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * @net_cat: the zero'd out category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CIPSO bitmap using the given DOI definition. Returns the minimum + * size in bytes of the network bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, + const unsigned char *host_cat, + u32 host_cat_len, + unsigned char *net_cat, + u32 net_cat_len) +{ + int host_spot = -1; + u32 net_spot; + u32 net_spot_max = 0; + u32 host_clen_bits = host_cat_len * 8; + u32 net_clen_bits = net_cat_len * 8; + u32 host_cat_size = doi_def->map.std->cat.local_size; + u32 *host_cat_array = doi_def->map.std->cat.local; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + net_spot_max = host_cat_len - 1; + while (net_spot_max > 0 && host_cat[net_spot_max] == 0) + net_spot_max--; + if (net_spot_max > net_cat_len) + return -EINVAL; + memcpy(net_cat, host_cat, net_spot_max); + return net_spot_max; + case CIPSO_V4_MAP_STD: + for (;;) { + host_spot = cipso_v4_bitmap_walk(host_cat, + host_clen_bits, + host_spot + 1, + 1); + if (host_spot < 0) + break; + if (host_spot >= host_cat_size) + return -EPERM; + + net_spot = host_cat_array[host_spot]; + if (net_spot >= net_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(net_cat, net_spot, 1); + + if (net_spot > net_spot_max) + net_spot_max = net_spot; + } + + if (host_spot == -2) + return -EFAULT; + + if (++net_spot_max % 8) + return net_spot_max / 8 + 1; + return net_spot_max / 8; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * @host_cat: the zero'd out category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * + * Description: + * Perform a label mapping to translate a CIPSO bitmap to the correct local + * MLS category bitmap using the given DOI definition. Returns the minimum + * size in bytes of the host bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + unsigned char *host_cat, + u32 host_cat_len) +{ + u32 host_spot; + u32 host_spot_max = 0; + int net_spot = -1; + u32 net_clen_bits = net_cat_len * 8; + u32 host_clen_bits = host_cat_len * 8; + u32 net_cat_size = doi_def->map.std->cat.cipso_size; + u32 *net_cat_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + if (net_cat_len > host_cat_len) + return -EINVAL; + memcpy(host_cat, net_cat, net_cat_len); + return net_cat_len; + case CIPSO_V4_MAP_STD: + for (;;) { + net_spot = cipso_v4_bitmap_walk(net_cat, + net_clen_bits, + net_spot + 1, + 1); + if (net_spot < 0) + break; + if (net_spot >= net_cat_size || + net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) + return -EPERM; + + host_spot = net_cat_array[net_spot]; + if (host_spot >= host_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(host_cat, host_spot, 1); + + if (host_spot > host_spot_max) + host_spot_max = host_spot; + } + + if (net_spot == -2) + return -EFAULT; + + if (++host_spot_max % 8) + return host_spot_max / 8 + 1; + return host_spot_max / 8; + } + + return -EINVAL; +} + +/* + * Protocol Handling Functions + */ + +#define CIPSO_V4_HDR_LEN 6 + +/** + * cipso_v4_gentag_hdr - Generate a CIPSO option header + * @doi_def: the DOI definition + * @len: the total tag length in bytes + * @buf: the CIPSO option buffer + * + * Description: + * Write a CIPSO header into the beginning of @buffer. Return zero on success, + * negative values on failure. + * + */ +static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, + u32 len, + unsigned char *buf) +{ + if (CIPSO_V4_HDR_LEN + len > 40) + return -ENOSPC; + + buf[0] = IPOPT_CIPSO; + buf[1] = CIPSO_V4_HDR_LEN + len; + *(u32 *)&buf[2] = htonl(doi_def->doi); + + return 0; +} + +#define CIPSO_V4_TAG1_CAT_LEN 30 + +/** + * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The + * actual buffer length may be larger than the indicated size due to + * translation between host and network category bitmaps. Returns zero on + * success, negative values on failure. + * + */ +static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char **buffer, + u32 *buffer_len) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 level; + + if (secattr->mls_cat) { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, + GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_hton(doi_def, + secattr->mls_cat, + secattr->mls_cat_len, + &buf[CIPSO_V4_HDR_LEN + 4], + CIPSO_V4_TAG1_CAT_LEN); + if (ret_val < 0) + goto gentag_failure; + + /* This will send packets using the "optimized" format when + * possibile as specified in section 3.4.2.6 of the + * CIPSO draft. */ + if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) + ret_val = 10; + + buf_len = 4 + ret_val; + } else { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + buf_len = 4; + } + + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + if (ret_val != 0) + goto gentag_failure; + + ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); + if (ret_val != 0) + goto gentag_failure; + + buf[CIPSO_V4_HDR_LEN] = 0x01; + buf[CIPSO_V4_HDR_LEN + 1] = buf_len; + buf[CIPSO_V4_HDR_LEN + 3] = level; + + *buffer = buf; + *buffer_len = CIPSO_V4_HDR_LEN + buf_len; + + return 0; + +gentag_failure: + kfree(buf); + return ret_val; +} + +/** + * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security + * attributes in @secattr. Return zero on success, negatives values on + * failure. + * + */ +static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u8 tag_len = tag[1]; + u32 level; + + ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); + if (ret_val != 0) + return ret_val; + secattr->mls_lvl = level; + secattr->mls_lvl_vld = 1; + + if (tag_len > 4) { + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + secattr->mls_cat_len = tag_len - 4; + break; + case CIPSO_V4_MAP_STD: + secattr->mls_cat_len = + doi_def->map.std->cat.local_size; + break; + } + secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); + if (secattr->mls_cat == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, + &tag[4], + tag_len - 4, + secattr->mls_cat, + secattr->mls_cat_len); + if (ret_val < 0) { + kfree(secattr->mls_cat); + return ret_val; + } + secattr->mls_cat_len = ret_val; + } + + return 0; +} + +/** + * cipso_v4_validate - Validate a CIPSO option + * @option: the start of the option, on error it is set to point to the error + * + * Description: + * This routine is called to validate a CIPSO option, it checks all of the + * fields to ensure that they are at least valid, see the draft snippet below + * for details. If the option is valid then a zero value is returned and + * the value of @option is unchanged. If the option is invalid then a + * non-zero value is returned and @option is adjusted to point to the + * offending portion of the option. From the IETF draft ... + * + * "If any field within the CIPSO options, such as the DOI identifier, is not + * recognized the IP datagram is discarded and an ICMP 'parameter problem' + * (type 12) is generated and returned. The ICMP code field is set to 'bad + * parameter' (code 0) and the pointer is set to the start of the CIPSO field + * that is unrecognized." + * + */ +int cipso_v4_validate(unsigned char **option) +{ + unsigned char *opt = *option; + unsigned char *tag; + unsigned char opt_iter; + unsigned char err_offset = 0; + u8 opt_len; + u8 tag_len; + struct cipso_v4_doi *doi_def = NULL; + u32 tag_iter; + + /* caller already checks for length values that are too large */ + opt_len = opt[1]; + if (opt_len < 8) { + err_offset = 1; + goto validate_return; + } + + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); + if (doi_def == NULL) { + err_offset = 2; + goto validate_return_locked; + } + + opt_iter = 6; + tag = opt + opt_iter; + while (opt_iter < opt_len) { + for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) + if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || + ++tag_iter == CIPSO_V4_TAG_MAXCNT) { + err_offset = opt_iter; + goto validate_return_locked; + } + + tag_len = tag[1]; + if (tag_len > (opt_len - opt_iter)) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + switch (tag[0]) { + case CIPSO_V4_TAG_RBITMAP: + if (tag_len < 4) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + /* We are already going to do all the verification + * necessary at the socket layer so from our point of + * view it is safe to turn these checks off (and less + * work), however, the CIPSO draft says we should do + * all the CIPSO validations here but it doesn't + * really specify _exactly_ what we need to validate + * ... so, just make it a sysctl tunable. */ + if (cipso_v4_rbm_strictvalid) { + if (cipso_v4_map_lvl_valid(doi_def, + tag[3]) < 0) { + err_offset = opt_iter + 3; + goto validate_return_locked; + } + if (tag_len > 4 && + cipso_v4_map_cat_rbm_valid(doi_def, + &tag[4], + tag_len - 4) < 0) { + err_offset = opt_iter + 4; + goto validate_return_locked; + } + } + break; + default: + err_offset = opt_iter; + goto validate_return_locked; + } + + tag += tag_len; + opt_iter += tag_len; + } + +validate_return_locked: + rcu_read_unlock(); +validate_return: + *option = opt + err_offset; + return err_offset; +} + +/** + * cipso_v4_error - Send the correct reponse for a bad packet + * @skb: the packet + * @error: the error code + * @gateway: CIPSO gateway flag + * + * Description: + * Based on the error code given in @error, send an ICMP error message back to + * the originating host. From the IETF draft ... + * + * "If the contents of the CIPSO [option] are valid but the security label is + * outside of the configured host or port label range, the datagram is + * discarded and an ICMP 'destination unreachable' (type 3) is generated and + * returned. The code field of the ICMP is set to 'communication with + * destination network administratively prohibited' (code 9) or to + * 'communication with destination host administratively prohibited' + * (code 10). The value of the code is dependent on whether the originator + * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The + * recipient of the ICMP message MUST be able to handle either value. The + * same procedure is performed if a CIPSO [option] can not be added to an + * IP packet because it is too large to fit in the IP options area." + * + * "If the error is triggered by receipt of an ICMP message, the message is + * discarded and no response is permitted (consistent with general ICMP + * processing rules)." + * + */ +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) +{ + if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) + return; + + if (gateway) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); +} + +/** + * cipso_v4_socket_setattr - Add a CIPSO option to a socket + * @sock: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sock->sk, which means it either needs to be in the + * process of being created or locked via lock_sock(sock->sk). Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 iter; + unsigned char *buf = NULL; + u32 buf_len = 0; + u32 opt_len; + struct ip_options *opt = NULL; + struct sock *sk; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + sk = sock->sk; + if (sk == NULL) + return 0; + + /* XXX - This code assumes only one tag per CIPSO option which isn't + * really a good assumption to make but since we only support the MAC + * tags right now it is a safe assumption. */ + iter = 0; + do { + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_gentag_rbm(doi_def, + secattr, + &buf, + &buf_len); + break; + default: + ret_val = -EPERM; + goto socket_setattr_failure; + } + + iter++; + } while (ret_val != 0 && + iter < CIPSO_V4_TAG_MAXCNT && + doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); + if (ret_val != 0) + goto socket_setattr_failure; + + /* We can't use ip_options_get() directly because it makes a call to + * ip_options_get_alloc() which allocates memory with GFP_KERNEL and + * we can't block here. */ + opt_len = (buf_len + 3) & ~3; + opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); + if (opt == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + memcpy(opt->__data, buf, buf_len); + opt->optlen = opt_len; + opt->is_data = 1; + kfree(buf); + buf = NULL; + ret_val = ip_options_compile(opt, NULL); + if (ret_val != 0) + goto socket_setattr_failure; + + sk_inet = inet_sk(sk); + if (sk_inet->is_icsk) { + sk_conn = inet_csk(sk); + if (sk_inet->opt) + sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; + sk_conn->icsk_ext_hdr_len += opt->optlen; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } + opt = xchg(&sk_inet->opt, opt); + kfree(opt); + + return 0; + +socket_setattr_failure: + kfree(buf); + kfree(opt); + return ret_val; +} + +/** + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. + * + */ +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + struct inet_sock *sk_inet; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + sk_inet = inet_sk(sk); + if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) + return -ENOMSG; + cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - + sizeof(struct iphdr); + ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); + if (ret_val == 0) + return ret_val; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) { + rcu_read_unlock(); + return -ENOMSG; + } + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + rcu_read_unlock(); + + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + lock_sock(sock->sk); + ret_val = cipso_v4_sock_getattr(sock->sk, secattr); + release_sock(sock->sk); + + return ret_val; +} + +/** + * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Parse the given packet's CIPSO option and return the security attributes. + * Returns zero on success and negative values on failure. + * + */ +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + if (!CIPSO_V4_OPTEXIST(skb)) + return -ENOMSG; + cipso_ptr = CIPSO_V4_OPTPTR(skb); + if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) + return 0; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) + goto skbuff_getattr_return; + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + +skbuff_getattr_return: + rcu_read_unlock(); + return ret_val; +} + +/* + * Setup Functions + */ + +/** + * cipso_v4_init - Initialize the CIPSO module + * + * Description: + * Initialize the CIPSO module and prepare it for use. Returns zero on success + * and negative values on failure. + * + */ +static int __init cipso_v4_init(void) +{ + int ret_val; + + ret_val = cipso_v4_cache_init(); + if (ret_val != 0) + panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", + ret_val); + + return 0; +} + +subsys_initcall(cipso_v4_init); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index c1b42b5257f8..7b068a891953 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -11,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/module.h> #include <linux/ip.h> @@ -26,7 +25,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct rtable *rt; - u32 saddr; + __be32 saddr; int oif; int err; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 54419b27686f..7602c79a389b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -27,7 +27,6 @@ * if no match found. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -44,6 +43,7 @@ #include <linux/in.h> #include <linux/errno.h> #include <linux/interrupt.h> +#include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -63,6 +63,7 @@ #include <net/ip.h> #include <net/route.h> #include <net/ip_fib.h> +#include <net/netlink.h> struct ipv4_devconf ipv4_devconf = { .accept_redirects = 1, @@ -79,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = { .accept_source_route = 1, }; -static void rtmsg_ifa(int event, struct in_ifaddr *); +static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { + [IFA_LOCAL] = { .type = NLA_U32 }, + [IFA_ADDRESS] = { .type = NLA_U32 }, + [IFA_BROADCAST] = { .type = NLA_U32 }, + [IFA_ANYCAST] = { .type = NLA_U32 }, + [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, +}; + +static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, @@ -94,10 +103,9 @@ static void devinet_sysctl_unregister(struct ipv4_devconf *p); static struct in_ifaddr *inet_alloc_ifa(void) { - struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); + struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); if (ifa) { - memset(ifa, 0, sizeof(*ifa)); INIT_RCU_HEAD(&ifa->rcu_head); } @@ -141,10 +149,9 @@ struct in_device *inetdev_init(struct net_device *dev) ASSERT_RTNL(); - in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL); + in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) goto out; - memset(in_dev, 0, sizeof(*in_dev)); INIT_RCU_HEAD(&in_dev->rcu_head); memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; @@ -217,7 +224,7 @@ static void inetdev_destroy(struct in_device *in_dev) call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } -int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) +int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { rcu_read_lock(); for_primary_ifa(in_dev) { @@ -232,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) return 0; } -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy) +static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy, struct nlmsghdr *nlh, u32 pid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -266,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, if (!do_promote) { *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -291,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -303,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { @@ -322,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } -static int inet_insert_ifa(struct in_ifaddr *ifa) +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy) +{ + __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); +} + +static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, + u32 pid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -367,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; } +static int inet_insert_ifa(struct in_ifaddr *ifa) +{ + return __inet_insert_ifa(ifa, NULL, 0); +} + static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get_rtnl(dev); @@ -410,8 +429,8 @@ struct in_device *inetdev_by_index(int ifindex) /* Called only from RTNL semaphored context. No locks. */ -struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, - u32 mask) +struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, + __be32 mask) { ASSERT_RTNL(); @@ -424,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; struct in_ifaddr *ifa, **ifap; + int err = -EINVAL; ASSERT_RTNL(); - if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) - goto out; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + if (err < 0) + goto errout; + + ifm = nlmsg_data(nlh); + in_dev = inetdev_by_index(ifm->ifa_index); + if (in_dev == NULL) { + err = -ENODEV; + goto errout; + } + __in_dev_put(in_dev); for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if ((rta[IFA_LOCAL - 1] && - memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), - &ifa->ifa_local, 4)) || - (rta[IFA_LABEL - 1] && - rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || - (rta[IFA_ADDRESS - 1] && - (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), - ifa)))) + if (tb[IFA_LOCAL] && + ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) + continue; + + if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) + continue; + + if (tb[IFA_ADDRESS] && + (ifm->ifa_prefixlen != ifa->ifa_prefixlen || + !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - inet_del_ifa(in_dev, ifap, 1); + + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); return 0; } -out: - return -EADDRNOTAVAIL; + + err = -EADDRNOTAVAIL; +errout: + return err; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; + struct in_ifaddr *ifa; + struct ifaddrmsg *ifm; struct net_device *dev; struct in_device *in_dev; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); - struct in_ifaddr *ifa; - int rc = -EINVAL; + int err = -EINVAL; - ASSERT_RTNL(); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + if (err < 0) + goto errout; - if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) - goto out; + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) + goto errout; - rc = -ENODEV; - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) - goto out; + dev = __dev_get_by_index(ifm->ifa_index); + if (dev == NULL) { + err = -ENODEV; + goto errout; + } - rc = -ENOBUFS; - if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { + in_dev = __in_dev_get_rtnl(dev); + if (in_dev == NULL) { in_dev = inetdev_init(dev); - if (!in_dev) - goto out; + if (in_dev == NULL) { + err = -ENOBUFS; + goto errout; + } } - if ((ifa = inet_alloc_ifa()) == NULL) - goto out; + ifa = inet_alloc_ifa(); + if (ifa == NULL) { + /* + * A potential indev allocation can be left alive, it stays + * assigned to its device and is destroy with it. + */ + err = -ENOBUFS; + goto errout; + } + + in_dev_hold(in_dev); + + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; - if (!rta[IFA_ADDRESS - 1]) - rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; - memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); - memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - if (rta[IFA_BROADCAST - 1]) - memcpy(&ifa->ifa_broadcast, - RTA_DATA(rta[IFA_BROADCAST - 1]), 4); - if (rta[IFA_ANYCAST - 1]) - memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4); ifa->ifa_flags = ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; - in_dev_hold(in_dev); - ifa->ifa_dev = in_dev; - if (rta[IFA_LABEL - 1]) - rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); + ifa->ifa_dev = in_dev; + + ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); + ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); + + if (tb[IFA_BROADCAST]) + ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); + + if (tb[IFA_ANYCAST]) + ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); + + if (tb[IFA_LABEL]) + nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - rc = inet_insert_ifa(ifa); -out: - return rc; + return ifa; + +errout: + return ERR_PTR(err); +} + +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct in_ifaddr *ifa; + + ASSERT_RTNL(); + + ifa = rtm_to_ifaddr(nlh); + if (IS_ERR(ifa)) + return PTR_ERR(ifa); + + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); } /* @@ -739,7 +805,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { - u32 old_mask = ifa->ifa_mask; + __be32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); @@ -810,9 +876,9 @@ out: return done; } -u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) +__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { - u32 addr = 0; + __be32 addr = 0; struct in_device *in_dev; rcu_read_lock(); @@ -861,11 +927,11 @@ out: return addr; } -static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, - u32 local, int scope) +static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, + __be32 local, int scope) { int same = 0; - u32 addr = 0; + __be32 addr = 0; for_ifa(in_dev) { if (!addr && @@ -905,9 +971,9 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope) +__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) { - u32 addr = 0; + __be32 addr = 0; struct in_device *in_dev; if (dev) { @@ -1059,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (ifa->ifa_address) - RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); + NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); + if (ifa->ifa_local) - RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); + NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); + if (ifa->ifa_broadcast) - RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); + NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); + if (ifa->ifa_anycast) - RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); + NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); + if (ifa->ifa_label[0]) - RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1130,19 +1201,27 @@ done: return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr* ifa) +static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, + u32 pid) { - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + struct sk_buff *skb; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; - if (!skb) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); - else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { + err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); - } else { - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); + goto errout; } + + err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); } static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { @@ -1154,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, .dumpit = inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, - [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, #endif }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 9bbdd4494551..13b29360d102 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,4 +1,4 @@ -#include <linux/config.h> +#include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -17,7 +17,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int err; struct iphdr *top_iph; struct ip_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -37,7 +38,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -92,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -104,26 +112,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } ip_send_check(top_iph); - err = 0; - error: return err; } @@ -138,8 +147,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph; struct ip_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -147,6 +158,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr[2]; struct scatterlist *sg; int padlen; + int err; if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) goto out; @@ -156,15 +168,16 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; - - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + u8 sum[alen]; + + err = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (err) + goto out; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; goto out; } @@ -179,7 +192,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); sg = &esp->sgbuf[0]; @@ -189,9 +202,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(err)) + return err; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -238,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (!x->props.mode) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -255,9 +270,9 @@ out: static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -294,11 +309,11 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -308,6 +323,7 @@ static void esp_destroy(struct xfrm_state *x) static int esp_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -317,30 +333,33 @@ static int esp_init_state(struct xfrm_state *x) if (x->ealg == NULL) goto error; - esp = kmalloc(sizeof(*esp), GFP_KERNEL); + esp = kzalloc(sizeof(*esp), GFP_KERNEL); if (esp == NULL) return -ENOMEM; - memset(esp, 0, sizeof(*esp)); - if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { + crypto_hash_digestsize(hash)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), + crypto_hash_digestsize(hash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -354,24 +373,22 @@ static int esp_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 31387abf53a2..9c399a70dd5d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -33,10 +32,12 @@ #include <linux/inet.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <linux/if_addr.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/init.h> +#include <linux/list.h> #include <net/ip.h> #include <net/protocol.h> @@ -51,48 +52,67 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -#define RT_TABLE_MIN RT_TABLE_MAIN - struct fib_table *ip_fib_local_table; struct fib_table *ip_fib_main_table; -#else +#define FIB_TABLE_HASHSZ 1 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -#define RT_TABLE_MIN 1 +#else -struct fib_table *fib_tables[RT_TABLE_MAX+1]; +#define FIB_TABLE_HASHSZ 256 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -struct fib_table *__fib_new_table(int id) +struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + tb = fib_get_table(id); + if (tb) + return tb; tb = fib_hash_init(id); if (!tb) return NULL; - fib_tables[id] = tb; + h = id & (FIB_TABLE_HASHSZ - 1); + hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); return tb; } +struct fib_table *fib_get_table(u32 id) +{ + struct fib_table *tb; + struct hlist_node *node; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + if (tb->tb_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + return NULL; +} #endif /* CONFIG_IP_MULTIPLE_TABLES */ - static void fib_flush(void) { int flushed = 0; -#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - int id; + struct hlist_node *node; + unsigned int h; - for (id = RT_TABLE_MAX; id>0; id--) { - if ((tb = fib_get_table(id))==NULL) - continue; - flushed += tb->tb_flush(tb); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + flushed += tb->tb_flush(tb); } -#else /* CONFIG_IP_MULTIPLE_TABLES */ - flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); - flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); -#endif /* CONFIG_IP_MULTIPLE_TABLES */ if (flushed) rt_cache_flush(-1); @@ -102,7 +122,7 @@ static void fib_flush(void) * Find the first device with a given source address. */ -struct net_device * ip_dev_find(u32 addr) +struct net_device * ip_dev_find(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; @@ -126,7 +146,7 @@ out: return dev; } -unsigned inet_addr_type(u32 addr) +unsigned inet_addr_type(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; @@ -160,8 +180,8 @@ unsigned inet_addr_type(u32 addr) - check, that packet arrived from expected physical interface. */ -int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, - struct net_device *dev, u32 *spec_dst, u32 *itag) +int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, + struct net_device *dev, __be32 *spec_dst, u32 *itag) { struct in_device *in_dev; struct flowi fl = { .nl_u = { .ip4_u = @@ -233,42 +253,190 @@ e_inval: #ifndef CONFIG_IP_NOSIOCRT +static inline __be32 sk_extract_addr(struct sockaddr *addr) +{ + return ((struct sockaddr_in *) addr)->sin_addr.s_addr; +} + +static int put_rtax(struct nlattr *mx, int len, int type, u32 value) +{ + struct nlattr *nla; + + nla = (struct nlattr *) ((char *) mx + len); + nla->nla_type = type; + nla->nla_len = nla_attr_size(4); + *(u32 *) nla_data(nla) = value; + + return len + nla_total_size(4); +} + +static int rtentry_to_fib_config(int cmd, struct rtentry *rt, + struct fib_config *cfg) +{ + __be32 addr; + int plen; + + memset(cfg, 0, sizeof(*cfg)); + + if (rt->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * Check mask for validity: + * a) it must be contiguous. + * b) destination must have all host bits clear. + * c) if application forgot to set correct family (AF_INET), + * reject request unless it is absolutely clear i.e. + * both family and mask are zero. + */ + plen = 32; + addr = sk_extract_addr(&rt->rt_dst); + if (!(rt->rt_flags & RTF_HOST)) { + __be32 mask = sk_extract_addr(&rt->rt_genmask); + + if (rt->rt_genmask.sa_family != AF_INET) { + if (mask || rt->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + + if (bad_mask(mask, addr)) + return -EINVAL; + + plen = inet_mask_len(mask); + } + + cfg->fc_dst_len = plen; + cfg->fc_dst = addr; + + if (cmd != SIOCDELRT) { + cfg->fc_nlflags = NLM_F_CREATE; + cfg->fc_protocol = RTPROT_BOOT; + } + + if (rt->rt_metric) + cfg->fc_priority = rt->rt_metric - 1; + + if (rt->rt_flags & RTF_REJECT) { + cfg->fc_scope = RT_SCOPE_HOST; + cfg->fc_type = RTN_UNREACHABLE; + return 0; + } + + cfg->fc_scope = RT_SCOPE_NOWHERE; + cfg->fc_type = RTN_UNICAST; + + if (rt->rt_dev) { + char *colon; + struct net_device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + + devname[IFNAMSIZ-1] = 0; + colon = strchr(devname, ':'); + if (colon) + *colon = 0; + dev = __dev_get_by_name(devname); + if (!dev) + return -ENODEV; + cfg->fc_oif = dev->ifindex; + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + cfg->fc_prefsrc = ifa->ifa_local; + } + } + + addr = sk_extract_addr(&rt->rt_gateway); + if (rt->rt_gateway.sa_family == AF_INET && addr) { + cfg->fc_gw = addr; + if (rt->rt_flags & RTF_GATEWAY && + inet_addr_type(addr) == RTN_UNICAST) + cfg->fc_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) + return -EINVAL; + + if (cfg->fc_scope == RT_SCOPE_NOWHERE) + cfg->fc_scope = RT_SCOPE_LINK; + + if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { + struct nlattr *mx; + int len = 0; + + mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); + if (mx == NULL) + return -ENOMEM; + + if (rt->rt_flags & RTF_MTU) + len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); + + if (rt->rt_flags & RTF_WINDOW) + len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); + + if (rt->rt_flags & RTF_IRTT) + len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); + + cfg->fc_mx = mx; + cfg->fc_mx_len = len; + } + + return 0; +} + /* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ int ip_rt_ioctl(unsigned int cmd, void __user *arg) { + struct fib_config cfg; + struct rtentry rt; int err; - struct kern_rta rta; - struct rtentry r; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&r, arg, sizeof(struct rtentry))) + + if (copy_from_user(&rt, arg, sizeof(rt))) return -EFAULT; + rtnl_lock(); - err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); + err = rtentry_to_fib_config(cmd, &rt, &cfg); if (err == 0) { + struct fib_table *tb; + if (cmd == SIOCDELRT) { - struct fib_table *tb = fib_get_table(req.rtm.rtm_table); - err = -ESRCH; + tb = fib_get_table(cfg.fc_table); if (tb) - err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_delete(tb, &cfg); + else + err = -ESRCH; } else { - struct fib_table *tb = fib_new_table(req.rtm.rtm_table); - err = -ENOBUFS; + tb = fib_new_table(cfg.fc_table); if (tb) - err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_insert(tb, &cfg); + else + err = -ENOBUFS; } - kfree(rta.rta_mx); + + /* allocated by rtentry_to_fib_config() */ + kfree(cfg.fc_mx); } rtnl_unlock(); return err; @@ -285,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) +struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PROTOINFO] = { .type = NLA_U32 }, + [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, +}; + +static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib_config *cfg) { - int i; - - for (i=1; i<=RTA_MAX; i++, rta++) { - struct rtattr *attr = *rta; - if (attr) { - if (RTA_PAYLOAD(attr) < 4) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) - *rta = (struct rtattr*)RTA_DATA(attr); + struct nlattr *attr; + int err, remaining; + struct rtmsg *rtm; + + err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + memset(cfg, 0, sizeof(*cfg)); + + rtm = nlmsg_data(nlh); + cfg->fc_family = rtm->rtm_family; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_tos = rtm->rtm_tos; + cfg->fc_table = rtm->rtm_table; + cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_scope = rtm->rtm_scope; + cfg->fc_type = rtm->rtm_type; + cfg->fc_flags = rtm->rtm_flags; + cfg->fc_nlflags = nlh->nlmsg_flags; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { + switch (attr->nla_type) { + case RTA_DST: + cfg->fc_dst = nla_get_be32(attr); + break; + case RTA_SRC: + cfg->fc_src = nla_get_be32(attr); + break; + case RTA_OIF: + cfg->fc_oif = nla_get_u32(attr); + break; + case RTA_GATEWAY: + cfg->fc_gw = nla_get_be32(attr); + break; + case RTA_PRIORITY: + cfg->fc_priority = nla_get_u32(attr); + break; + case RTA_PREFSRC: + cfg->fc_prefsrc = nla_get_be32(attr); + break; + case RTA_METRICS: + cfg->fc_mx = nla_data(attr); + cfg->fc_mx_len = nla_len(attr); + break; + case RTA_MULTIPATH: + cfg->fc_mp = nla_data(attr); + cfg->fc_mp_len = nla_len(attr); + break; + case RTA_FLOW: + cfg->fc_flow = nla_get_u32(attr); + break; + case RTA_MP_ALGO: + cfg->fc_mp_alg = nla_get_u32(attr); + break; + case RTA_TABLE: + cfg->fc_table = nla_get_u32(attr); + break; } } + return 0; +errout: + return err; } int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_get_table(r->rtm_table); - if (tb) - return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ESRCH; + tb = fib_get_table(cfg.fc_table); + if (tb == NULL) { + err = -ESRCH; + goto errout; + } + + err = tb->tb_delete(tb, &cfg); +errout: + return err; } int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_new_table(r->rtm_table); - if (tb) - return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ENOBUFS; + tb = fib_new_table(cfg.fc_table); + if (tb == NULL) { + err = -ENOBUFS; + goto errout; + } + + err = tb->tb_insert(tb, &cfg); +errout: + return err; } int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - int t; - int s_t; + unsigned int h, s_h; + unsigned int e = 0, s_e; struct fib_table *tb; + struct hlist_node *node; + int dumped = 0; - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && + ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) return ip_rt_dump(skb, cb); - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_TABLE_MIN; - - for (t=s_t; t<=RT_TABLE_MAX; t++) { - if (t < s_t) continue; - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - if ((tb = fib_get_table(t))==NULL) - continue; - if (tb->tb_dump(tb, skb, cb) < 0) - break; + s_h = cb->args[0]; + s_e = cb->args[1]; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->tb_dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } } - - cb->args[0] = t; +out: + cb->args[1] = e; + cb->args[0] = h; return skb->len; } @@ -367,17 +627,18 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) only when netlink is already locked. */ -static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { - struct fib_table * tb; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; - struct kern_rta rta; - - memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); + struct fib_table *tb; + struct fib_config cfg = { + .fc_protocol = RTPROT_KERNEL, + .fc_type = type, + .fc_dst = dst, + .fc_dst_len = dst_len, + .fc_prefsrc = ifa->ifa_local, + .fc_oif = ifa->ifa_dev->dev->ifindex, + .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, + }; if (type == RTN_UNICAST) tb = fib_new_table(RT_TABLE_MAIN); @@ -387,26 +648,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr if (tb == NULL) return; - req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = cmd; - req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; - req.nlh.nlmsg_pid = 0; - req.nlh.nlmsg_seq = 0; + cfg.fc_table = tb->tb_id; - req.rtm.rtm_dst_len = dst_len; - req.rtm.rtm_table = tb->tb_id; - req.rtm.rtm_protocol = RTPROT_KERNEL; - req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); - req.rtm.rtm_type = type; - - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; + if (type != RTN_LOCAL) + cfg.fc_scope = RT_SCOPE_LINK; + else + cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_insert(tb, &cfg); else - tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_delete(tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -414,9 +666,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; struct in_ifaddr *prim = ifa; - u32 mask = ifa->ifa_mask; - u32 addr = ifa->ifa_local; - u32 prefix = ifa->ifa_address&mask; + __be32 mask = ifa->ifa_mask; + __be32 addr = ifa->ifa_local; + __be32 prefix = ifa->ifa_address&mask; if (ifa->ifa_flags&IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, prefix, mask); @@ -432,7 +684,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && @@ -454,8 +706,8 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) struct net_device *dev = in_dev->dev; struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa; - u32 brd = ifa->ifa_address|~ifa->ifa_mask; - u32 any = ifa->ifa_address&ifa->ifa_mask; + __be32 brd = ifa->ifa_address|~ifa->ifa_mask; + __be32 any = ifa->ifa_address&ifa->ifa_mask; #define LOCAL_OK 1 #define BRD_OK 2 #define BRD0_OK 4 @@ -653,11 +905,17 @@ static struct notifier_block fib_netdev_notifier = { void __init ip_fib_init(void) { + unsigned int i; + + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&fib_table_hash[i]); #ifndef CONFIG_IP_MULTIPLE_TABLES ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); #else - fib_rules_init(); + fib4_rules_init(); #endif register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index e2890ec8159e..107bb6cbb0b3 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> @@ -52,7 +51,7 @@ static kmem_cache_t *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; struct list_head fn_alias; - u32 fn_key; + __be32 fn_key; }; struct fn_zone { @@ -65,7 +64,7 @@ struct fn_zone { #define FZ_HASHMASK(fz) ((fz)->fz_hashmask) int fz_order; /* Zone order */ - u32 fz_mask; + __be32 fz_mask; #define FZ_MASK(fz) ((fz)->fz_mask) }; @@ -78,7 +77,7 @@ struct fn_hash { struct fn_zone *fn_zone_list; }; -static inline u32 fn_hash(u32 key, struct fn_zone *fz) +static inline u32 fn_hash(__be32 key, struct fn_zone *fz) { u32 h = ntohl(key)>>(32 - fz->fz_order); h ^= (h>>20); @@ -88,7 +87,7 @@ static inline u32 fn_hash(u32 key, struct fn_zone *fz) return h; } -static inline u32 fz_key(u32 dst, struct fn_zone *fz) +static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) { return dst & FZ_MASK(fz); } @@ -205,11 +204,10 @@ static struct fn_zone * fn_new_zone(struct fn_hash *table, int z) { int i; - struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL); + struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); if (!fz) return NULL; - memset(fz, 0, sizeof(struct fn_zone)); if (z) { fz->fz_divisor = 16; } else { @@ -256,7 +254,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result struct hlist_head *head; struct hlist_node *node; struct fib_node *f; - u32 k = fz_key(flp->fl4_dst, fz); + __be32 k = fz_key(flp->fl4_dst, fz); head = &fz->fz_hash[fn_hash(k, fz)]; hlist_for_each_entry(f, node, head, fn_hash) { @@ -367,7 +365,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) } /* Return the node in FZ matching KEY. */ -static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) +static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) { struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)]; struct hlist_node *node; @@ -381,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) return NULL; } -static int -fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f, *f; struct fib_alias *fa, *new_fa; struct fn_zone *fz; struct fib_info *fi; - int z = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; - u32 key; + u8 tos = cfg->fc_tos; + __be32 key; int err; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - fz = table->fn_zones[z]; - if (!fz && !(fz = fn_new_zone(table, z))) + + fz = table->fn_zones[cfg->fc_dst_len]; + if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) return -ENOBUFS; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } - if ((fi = fib_create_info(r, rta, n, &err)) == NULL) - return err; + fi = fib_create_info(cfg); + if (IS_ERR(fi)) + return PTR_ERR(fi); if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && - (z==32 || (1<<z) > fz->fz_divisor)) + (cfg->fc_dst_len == 32 || + (1 << cfg->fc_dst_len) > fz->fz_divisor)) fn_rehash_zone(fz); f = fib_find_node(fz, key); @@ -442,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (n->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; write_lock_bh(&fib_hash_lock); fi_drop = fa->fa_info; fa->fa_info = fi; - fa->fa_type = type; - fa->fa_scope = r->rtm_scope; + fa->fa_type = cfg->fc_type; + fa->fa_scope = cfg->fc_scope; state = fa->fa_state; fa->fa_state &= ~FA_S_ACCESSED; fib_hash_genid++; @@ -476,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) goto out; } - if (!(n->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(n->nlmsg_flags&NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -508,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* @@ -528,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fz->fz_nent++; rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, + &cfg->fc_nlinfo); return 0; out_free_new_fa: @@ -539,30 +535,25 @@ out: } -static int -fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash*)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; - int z = r->rtm_dst_len; struct fn_zone *fz; - u32 key; - u8 tos = r->rtm_tos; + __be32 key; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - if ((fz = table->fn_zones[z]) == NULL) + + if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) return -ESRCH; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } f = fib_find_node(fz, key); @@ -570,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (!f) fa = NULL; else - fa = fib_find_alias(&f->fn_alias, tos, 0); + fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); if (!fa) return -ESRCH; @@ -579,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != tos) + if (fa->fa_tos != cfg->fc_tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, n, rta, fi) == 0) { + if ((!cfg->fc_type || + fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -598,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, int kill_fn; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, + tb->tb_id, &cfg->fc_nlinfo); kill_fn = 0; write_lock_bh(&fib_hash_lock); @@ -686,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, struct fib_node *f; int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; hlist_for_each_entry(f, node, head, fn_hash) { struct fib_alias *fa; @@ -701,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, tb->tb_id, fa->fa_type, fa->fa_scope, - &f->fn_key, + f->fn_key, fz->fz_order, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } next: i++; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -724,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h=0; h < fz->fz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); if (fz->fz_hash == NULL || hlist_empty(&fz->fz_hash[h])) continue; if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -748,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin struct fn_zone *fz; struct fn_hash *table = (struct fn_hash*)tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&fib_hash_lock); for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&fib_hash_lock); return -1; } } read_unlock(&fib_hash_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; @@ -974,7 +966,7 @@ static void fib_seq_stop(struct seq_file *seq, void *v) read_unlock(&fib_hash_lock); } -static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi) +static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) { static const unsigned type2flags[RTN_MAX + 1] = { [7] = RTF_REJECT, [8] = RTF_REJECT, @@ -983,7 +975,7 @@ static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi) if (fi && fi->fib_nh->nh_gw) flags |= RTF_GATEWAY; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; return flags; @@ -999,7 +991,7 @@ static int fib_seq_show(struct seq_file *seq, void *v) { struct fib_iter_state *iter; char bf[128]; - u32 prefix, mask; + __be32 prefix, mask; unsigned flags; struct fib_node *f; struct fib_alias *fa; @@ -1047,7 +1039,7 @@ static int fib_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct fib_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; @@ -1058,7 +1050,6 @@ static int fib_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ef6609ea0eb7..0e8b70bad4e1 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -20,22 +20,17 @@ struct fib_alias { /* Exported by fib_semantics.c */ extern int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, __u32 zone, __u32 mask, + struct fib_result *res, __be32 zone, __be32 mask, int prefixlen); extern void fib_release_info(struct fib_info *); -extern struct fib_info *fib_create_info(const struct rtmsg *r, - struct kern_rta *rta, - const struct nlmsghdr *, - int *err); -extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, - struct kern_rta *rta, struct fib_info *fi); +extern struct fib_info *fib_create_info(struct fib_config *cfg); +extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, + u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); -extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req); +extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, + int dst_len, u32 tb_id, struct nl_info *info); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ec566f3e66c7..0852b9cd065a 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -5,9 +5,8 @@ * * IPv4 Forwarding Information Base: policy rules. * - * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * Thomas Graf <tgraf@suug.ch> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,465 +18,350 @@ * Marc Boucher : routing by fwmark */ -#include <linux/config.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/errno.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/inetdevice.h> #include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/inetdevice.h> #include <linux/init.h> #include <linux/list.h> #include <linux/rcupdate.h> - #include <net/ip.h> -#include <net/protocol.h> #include <net/route.h> #include <net/tcp.h> -#include <net/sock.h> #include <net/ip_fib.h> +#include <net/fib_rules.h> -#define FRprintk(a...) +static struct fib_rules_ops fib4_rules_ops; -struct fib_rule +struct fib4_rule { - struct hlist_node hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - u32 r_src; - u32 r_srcmask; - u32 r_dst; - u32 r_dstmask; - u32 r_srcmap; - u8 r_flags; - u8 r_tos; + struct fib_rule common; + u8 dst_len; + u8 src_len; + u8 tos; + __be32 src; + __be32 srcmask; + __be32 dst; + __be32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; + u32 fwmask; #endif - int r_ifindex; #ifdef CONFIG_NET_CLS_ROUTE - __u32 r_tclassid; + u32 tclassid; #endif - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; -static struct fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFF, - .r_table = RT_TABLE_DEFAULT, - .r_action = RTN_UNICAST, +static struct fib4_rule default_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFF, + .table = RT_TABLE_DEFAULT, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule main_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFE, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST, +static struct fib4_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule local_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_table = RT_TABLE_LOCAL, - .r_action = RTN_UNICAST, +static struct fib4_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .table = RT_TABLE_LOCAL, + .action = FR_ACT_TO_TBL, + .flags = FIB_RULE_PERMANENT, + }, }; -static struct hlist_head fib_rules; +static LIST_HEAD(fib4_rules); -/* writer func called from netlink -- rtnl_sem hold*/ - -static void rtmsg_rule(int, struct fib_rule *); - -int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +#ifdef CONFIG_NET_CLS_ROUTE +u32 fib_rules_tclass(struct fib_result *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && - rtm->rtm_tos == r->r_tos && -#ifdef CONFIG_IP_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - err = -EPERM; - if (r == &local_rule) - break; - - hlist_del_rcu(&r->hlist); - r->r_dead = 1; - rtmsg_rule(RTM_DELRULE, r); - fib_rule_put(r); - err = 0; - break; - } - } - return err; + return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; } +#endif -/* Allocate new unique table id */ - -static struct fib_table *fib_empty_table(void) +int fib_lookup(struct flowi *flp, struct fib_result *res) { - int id; + struct fib_lookup_arg arg = { + .result = res, + }; + int err; - for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_tables[id] == NULL) - return __fib_new_table(id); - return NULL; -} + err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + res->r = arg.rule; -static inline void fib_rule_put_rcu(struct rcu_head *head) -{ - struct fib_rule *r = container_of(head, struct fib_rule, rcu); - kfree(r); + return err; } -void fib_rule_put(struct fib_rule *r) +static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, fib_rule_put_rcu); - else - printk("Freeing alive rule %p\n", r); + int err = -EAGAIN; + struct fib_table *tbl; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } + + if ((tbl = fib_get_table(rule->table)) == NULL) + goto errout; + + err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; } -/* writer func called from netlink -- rtnl_sem hold*/ -int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +void fib_select_default(const struct flowi *flp, struct fib_result *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || - (rtm->rtm_tos & ~IPTOS_TOS_MASK)) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct fib_table *table; - if (rtm->rtm_type == RTN_UNICAST) { - if ((table = fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = table->tb_id; - } + if (res->r && res->r->action == FR_ACT_TO_TBL && + FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + struct fib_table *tb; + if ((tb = fib_get_table(res->r->table)) != NULL) + tb->tb_select_default(tb, flp, res); } +} - new_r = kmalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - memset(new_r, 0, sizeof(*new_r)); - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); - new_r->r_tos = rtm->rtm_tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); -#endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = __dev_get_by_name(new_r->r_ifname); - if (dev) - new_r->r_ifindex = dev->ifindex; - } -#ifdef CONFIG_NET_CLS_ROUTE - if (rta[RTA_FLOW-1]) - memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); -#endif - r = container_of(fib_rules.first, struct fib_rule, hlist); +static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib4_rule *r = (struct fib4_rule *) rule; + __be32 daddr = fl->fl4_dst; + __be32 saddr = fl->fl4_src; - if (!new_r->r_preference) { - if (r && r->hlist.next != NULL) { - r = container_of(r->hlist.next, struct fib_rule, hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; - } - } + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); + if (r->tos && (r->tos != fl->fl4_tos)) + return 0; - if (last) - hlist_add_after_rcu(&last->hlist, &new_r->hlist); - else - hlist_add_before_rcu(&new_r->hlist, &r->hlist); +#ifdef CONFIG_IP_ROUTE_FWMARK + if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) + return 0; +#endif - rtmsg_rule(RTM_NEWRULE, new_r); - return 0; + return 1; } -#ifdef CONFIG_NET_CLS_ROUTE -u32 fib_rules_tclass(struct fib_result *res) +static struct fib_table *fib_empty_table(void) { - if (res->r) - return res->r->r_tclassid; - return 0; + u32 id; + + for (id = 1; id <= RT_TABLE_MAX; id++) + if (fib_get_table(id) == NULL) + return fib_new_table(id); + return NULL; } -#endif -/* callers should hold rtnl semaphore */ +static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U32 }, + [FRA_DST] = { .type = NLA_U32 }, + [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, + [FRA_FLOW] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, +}; -static void fib_rules_detach(struct net_device *dev) +static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct hlist_node *node; - struct fib_rule *r; + int err = -EINVAL; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + + if (frh->src_len > 32 || frh->dst_len > 32 || + (frh->tos & ~IPTOS_TOS_MASK)) + goto errout; + + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct fib_table *table; - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; + table = fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } + rule->table = table->tb_id; + } } -} -/* callers should hold rtnl semaphore */ + if (tb[FRA_SRC]) + rule4->src = nla_get_be32(tb[FRA_SRC]); -static void fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct fib_rule *r; + if (tb[FRA_DST]) + rule4->dst = nla_get_be32(tb[FRA_DST]); - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; +#ifdef CONFIG_IP_ROUTE_FWMARK + if (tb[FRA_FWMARK]) { + rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule4->fwmark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule4->fwmask = 0xFFFFFFFF; } + + if (tb[FRA_FWMASK]) + rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); +#endif + +#ifdef CONFIG_NET_CLS_ROUTE + if (tb[FRA_FLOW]) + rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); +#endif + + rule4->src_len = frh->src_len; + rule4->srcmask = inet_make_mask(rule4->src_len); + rule4->dst_len = frh->dst_len; + rule4->dstmask = inet_make_mask(rule4->dst_len); + rule4->tos = frh->tos; + + err = 0; +errout: + return err; } -int fib_lookup(const struct flowi *flp, struct fib_result *res) +static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - int err; - struct fib_rule *r, *policy; - struct fib_table *tb; - struct hlist_node *node; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; - u32 daddr = flp->fl4_dst; - u32 saddr = flp->fl4_src; + if (frh->src_len && (rule4->src_len != frh->src_len)) + return 0; -FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", - NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); + if (frh->dst_len && (rule4->dst_len != frh->dst_len)) + return 0; - rcu_read_lock(); + if (frh->tos && (rule4->tos != frh->tos)) + return 0; - hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || - (r->r_tos && r->r_tos != flp->fl4_tos) || #ifdef CONFIG_IP_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || + if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; + + if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - -FRprintk("tb %d r %d ", r->r_table, r->r_action); - switch (r->r_action) { - case RTN_UNICAST: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - if ((tb = fib_get_table(r->r_table)) == NULL) - continue; - err = tb->tb_lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } -FRprintk("FAILURE\n"); - rcu_read_unlock(); - return -ENETUNREACH; -} +#ifdef CONFIG_NET_CLS_ROUTE + if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) + return 0; +#endif -void fib_select_default(const struct flowi *flp, struct fib_result *res) -{ - if (res->r && res->r->r_action == RTN_UNICAST && - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { - struct fib_table *tb; - if ((tb = fib_get_table(res->r->r_table)) != NULL) - tb->tb_select_default(tb, flp, res); - } -} + if (tb[FRA_SRC] && (rule4->src != nla_get_be32(tb[FRA_SRC]))) + return 0; -static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; + if (tb[FRA_DST] && (rule4->dst != nla_get_be32(tb[FRA_DST]))) + return 0; - if (event == NETDEV_UNREGISTER) - fib_rules_detach(dev); - else if (event == NETDEV_REGISTER) - fib_rules_attach(dev); - return NOTIFY_DONE; + return 1; } +static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib4_rule *rule4 = (struct fib4_rule *) rule; -static struct notifier_block fib_rules_notifier = { - .notifier_call =fib_rules_event, -}; + frh->family = AF_INET; + frh->dst_len = rule4->dst_len; + frh->src_len = rule4->src_len; + frh->tos = rule4->tos; -static __inline__ int inet_fill_rule(struct sk_buff *skb, - struct fib_rule *r, - u32 pid, u32 seq, int event, - unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_INET; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = r->r_tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); + if (rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); + + if (rule4->fwmask || rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); #endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 4, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 4, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); + + if (rule4->dst_len) + NLA_PUT_BE32(skb, FRA_DST, rule4->dst); + + if (rule4->src_len) + NLA_PUT_BE32(skb, FRA_SRC, rule4->src); + #ifdef CONFIG_NET_CLS_ROUTE - if (r->r_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); + if (rule4->tclassid) + NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); #endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; + return 0; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return -ENOBUFS; } -/* callers should hold rtnl semaphore */ - -static void rtmsg_rule(int event, struct fib_rule *r) +int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); - - if (!skb) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); - else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { - kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); - } else { - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); - } + return fib_rules_dump(skb, cb, AF_INET); } -int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +static u32 fib4_rule_default_pref(void) { - int idx = 0; - int s_idx = cb->args[0]; - struct fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &fib_rules, hlist) { - - if (idx < s_idx) - continue; - if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWRULE, NLM_F_MULTI) < 0) - break; - idx++; + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&fib4_rules)) { + pos = fib4_rules.next; + if (pos->next != &fib4_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } - rcu_read_unlock(); - cb->args[0] = idx; - return skb->len; + return 0; } -void __init fib_rules_init(void) +static struct fib_rules_ops fib4_rules_ops = { + .family = AF_INET, + .rule_size = sizeof(struct fib4_rule), + .action = fib4_rule_action, + .match = fib4_rule_match, + .configure = fib4_rule_configure, + .compare = fib4_rule_compare, + .fill = fib4_rule_fill, + .default_pref = fib4_rule_default_pref, + .nlgroup = RTNLGRP_IPV4_RULE, + .policy = fib4_rule_policy, + .rules_list = &fib4_rules, + .owner = THIS_MODULE, +}; + +void __init fib4_rules_init(void) { - INIT_HLIST_HEAD(&fib_rules); - hlist_add_head(&local_rule.hlist, &fib_rules); - hlist_add_after(&local_rule.hlist, &main_rule.hlist); - hlist_add_after(&main_rule.hlist, &default_rule.hlist); - register_netdevice_notifier(&fib_rules_notifier); + list_add_tail(&local_rule.common.list, &fib4_rules); + list_add_tail(&main_rule.common.list, &fib4_rules); + list_add_tail(&default_rule.common.list, &fib4_rules); + + fib_rules_register(&fib4_rules_ops); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0f4145babb14..884d176e0082 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> @@ -34,7 +33,6 @@ #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> -#include <linux/netlink.h> #include <linux/init.h> #include <net/arp.h> @@ -45,12 +43,14 @@ #include <net/sock.h> #include <net/ip_fib.h> #include <net/ip_mp_alg.h> +#include <net/netlink.h> +#include <net/nexthop.h> #include "fib_lookup.h" #define FSprintk(a...) -static DEFINE_RWLOCK(fib_info_lock); +static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_hash_size; @@ -160,7 +160,7 @@ void free_fib_info(struct fib_info *fi) void fib_release_info(struct fib_info *fi) { - write_lock(&fib_info_lock); + spin_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) @@ -173,7 +173,7 @@ void fib_release_info(struct fib_info *fi) fi->fib_dead = 1; fib_info_put(fi); } - write_unlock(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); } static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) @@ -203,7 +203,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; - val ^= fi->fib_prefsrc; + val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; return (val ^ (val >> 7) ^ (val >> 12)) & mask; @@ -248,14 +248,14 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) Used only by redirect accept routine. */ -int ip_fib_check_default(u32 gw, struct net_device *dev) +int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct hlist_node *node; struct fib_nh *nh; unsigned int hash; - read_lock(&fib_info_lock); + spin_lock(&fib_info_lock); hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; @@ -263,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags&RTNH_F_DEAD)) { - read_unlock(&fib_info_lock); + spin_unlock(&fib_info_lock); return 0; } } - read_unlock(&fib_info_lock); + spin_unlock(&fib_info_lock); return -1; } -void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req) +void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, + int dst_len, u32 tb_id, struct nl_info *info) { struct sk_buff *skb; - u32 pid = req ? req->pid : n->nlmsg_pid; - int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); - - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; - - if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, - fa->fa_type, fa->fa_scope, &key, z, - fa->fa_tos, - fa->fa_info, 0) < 0) { + int payload = sizeof(struct rtmsg) + 256; + u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = fib_dump_info(skb, info->pid, seq, event, tb_id, + fa->fa_type, fa->fa_scope, key, dst_len, + fa->fa_tos, fa->fa_info, 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; - if (n->nlmsg_flags&NLM_F_ECHO) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); - if (n->nlmsg_flags&NLM_F_ECHO) - netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + + err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); } /* Return the first fib alias matching TOS with @@ -343,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) -{ - while (RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(u32*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - return 0; -} - -static int -fib_count_nexthops(struct rtattr *rta) +static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) { int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - while (nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(rtnh, remaining)) { nhs++; - nhp = RTNH_NEXT(nhp); - }; - return nhs; + rtnh = rtnh_next(rtnh, &remaining); + } + + /* leftover implies invalid nexthop configuration, discard it */ + return remaining > 0 ? 0 : nhs; } -static int -fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, + int remaining, struct fib_config *cfg) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; - nh->nh_oif = nhp->rtnh_ifindex; - nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { - nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + + nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + nh->nh_oif = rtnh->rtnh_ifindex; + nh->nh_weight = rtnh->rtnh_hops + 1; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + nh->nh_gw = nla ? nla_get_be32(nla) : 0; #ifdef CONFIG_NET_CLS_ROUTE - nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); + nla = nla_find(attrs, attrlen, RTA_FLOW); + nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); + return 0; } #endif -int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, - struct fib_info *fi) +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - struct rtnexthop *nhp; - int nhlen; + struct rtnexthop *rtnh; + int remaining; #endif - if (rta->rta_priority && - *rta->rta_priority != fi->fib_priority) + if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) + if (cfg->fc_oif || cfg->fc_gw) { + if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && + (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; return 1; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp == NULL) + if (cfg->fc_mp == NULL) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + + rtnh = cfg->fc_mp; + remaining = cfg->fc_mp_len; for_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - u32 gw; + int attrlen; - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) + + if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) return 1; - if (attrlen) { - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); - if (gw && gw != nh->nh_gw) + + attrlen = rtnh_attrlen(rtnh); + if (attrlen < 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla && nla_get_be32(nla) != nh->nh_gw) return 1; #ifdef CONFIG_NET_CLS_ROUTE - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); - if (gw && gw != nh->nh_tclassid) + nla = nla_find(attrs, attrlen, RTA_FLOW); + if (nla && nla_get_u32(nla) != nh->nh_tclassid) return 1; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); #endif return 0; @@ -489,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, |-> {local prefix} (terminal node) */ -static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) +static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, + struct fib_nh *nh) { int err; @@ -503,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; - if (r->rtm_scope >= RT_SCOPE_LINK) + if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; @@ -517,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n return 0; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = nh->nh_gw, - .scope = r->rtm_scope + 1 } }, - .oif = nh->nh_oif }; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = nh->nh_gw, + .scope = cfg->fc_scope + 1, + }, + }, + .oif = nh->nh_oif, + }; /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) @@ -564,11 +568,11 @@ out: return 0; } -static inline unsigned int fib_laddr_hashfn(u32 val) +static inline unsigned int fib_laddr_hashfn(__be32 val) { unsigned int mask = (fib_hash_size - 1); - return (val ^ (val >> 7) ^ (val >> 14)) & mask; + return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } static struct hlist_head *fib_hash_alloc(int bytes) @@ -599,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_hash_size; unsigned int i, bytes; - write_lock(&fib_info_lock); + spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; @@ -640,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash, } fib_info_laddrhash = new_laddrhash; - write_unlock(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); fib_hash_free(old_laddrhash, bytes); } -struct fib_info * -fib_create_info(const struct rtmsg *r, struct kern_rta *rta, - const struct nlmsghdr *nlh, int *errp) +struct fib_info *fib_create_info(struct fib_config *cfg) { int err; struct fib_info *fi = NULL; struct fib_info *ofi; -#ifdef CONFIG_IP_ROUTE_MULTIPATH int nhs = 1; -#else - const int nhs = 1; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - u32 mp_alg = IP_MP_ALG_NONE; -#endif /* Fast check to catch the most weird cases */ - if (fib_props[r->rtm_type].scope > r->rtm_scope) + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp) { - nhs = fib_count_nexthops(rta->rta_mp); + if (cfg->fc_mp) { + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); if (nhs == 0) goto err_inval; } #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rta->rta_mp_alg) { - mp_alg = *rta->rta_mp_alg; - - if (mp_alg < IP_MP_ALG_NONE || - mp_alg > IP_MP_ALG_MAX) + if (cfg->fc_mp_alg) { + if (cfg->fc_mp_alg < IP_MP_ALG_NONE || + cfg->fc_mp_alg > IP_MP_ALG_MAX) goto err_inval; } #endif @@ -710,49 +703,47 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; } - fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; fib_info_cnt++; - memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh)); - fi->fib_protocol = r->rtm_protocol; + fi->fib_protocol = cfg->fc_protocol; + fi->fib_flags = cfg->fc_flags; + fi->fib_priority = cfg->fc_priority; + fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_nhs = nhs; change_nexthops(fi) { nh->nh_parent = fi; } endfor_nexthops(fi) - fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) goto err_inval; - fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); + fi->fib_metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); - if (rta->rta_mp) { + if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); + if (err != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) + if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) goto err_inval; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) goto err_inval; #endif #else @@ -760,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, #endif } else { struct fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 4); + + nh->nh_oif = cfg->fc_oif; + nh->nh_gw = cfg->fc_gw; + nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow) - memcpy(&nh->nh_tclassid, rta->rta_flow, 4); + nh->nh_tclassid = cfg->fc_flow; #endif - nh->nh_flags = r->rtm_flags; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = mp_alg; + fi->fib_mp_alg = cfg->fc_mp_alg; #endif - if (fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (fib_props[cfg->fc_type].error) { + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; } - if (r->rtm_scope > RT_SCOPE_HOST) + if (cfg->fc_scope > RT_SCOPE_HOST) goto err_inval; - if (r->rtm_scope == RT_SCOPE_HOST) { + if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -800,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(r, fi, nh)) != 0) + if ((err = fib_check_nh(cfg, fi, nh)) != 0) goto failure; } endfor_nexthops(fi) } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) + if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || + fi->fib_prefsrc != cfg->fc_dst) if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -822,7 +811,7 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock(&fib_info_lock); + spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -841,24 +830,24 @@ link_it: head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) - write_unlock(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); return fi; err_inval: err = -EINVAL; failure: - *errp = err; if (fi) { fi->fib_dead = 1; free_fib_info(fi); } - return NULL; + + return ERR_PTR(err); } /* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, __u32 zone, __u32 mask, + struct fib_result *res, __be32 zone, __be32 mask, int prefixlen) { struct fib_alias *fa; @@ -925,8 +914,7 @@ out_fill_res: res->fi = fa->fa_info; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED res->netmask = mask; - res->network = zone & - (0xFFFFFFFF >> (32 - prefixlen)); + res->network = zone & inet_make_mask(prefixlen); #endif atomic_inc(&res->fi->fib_clntref); return 0; @@ -934,225 +922,94 @@ out_fill_res: /* Find appropriate source address to this destination */ -u32 __fib_res_prefsrc(struct fib_result *res) +__be32 __fib_res_prefsrc(struct fib_result *res) { return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); } -int -fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, - struct fib_info *fi, unsigned int flags) +int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, + u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, + struct fib_info *fi, unsigned int flags) { + struct nlmsghdr *nlh; struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + if (nlh == NULL) + return -ENOBUFS; + + rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; + NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; - if (rtm->rtm_dst_len) - RTA_PUT(skb, RTA_DST, 4, dst); rtm->rtm_protocol = fi->fib_protocol; + + if (rtm->rtm_dst_len) + NLA_PUT_BE32(skb, RTA_DST, dst); + if (fi->fib_priority) - RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); -#ifdef CONFIG_NET_CLS_ROUTE - if (fi->fib_nh[0].nh_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); -#endif + NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); + if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + if (fi->fib_prefsrc) - RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); + NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); + if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw) - RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); + NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); + if (fi->fib_nh->nh_oif) - RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); + NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); +#ifdef CONFIG_NET_CLS_ROUTE + if (fi->fib_nh[0].nh_tclassid) + NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); +#endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { - struct rtnexthop *nhp; - struct rtattr *mp_head; - if (skb_tailroom(skb) <= RTA_SPACE(0)) - goto rtattr_failure; - mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); - - for_nexthops(fi) { - if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) - goto rtattr_failure; - nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); - nhp->rtnh_flags = nh->nh_flags & 0xFF; - nhp->rtnh_hops = nh->nh_weight-1; - nhp->rtnh_ifindex = nh->nh_oif; - if (nh->nh_gw) - RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); - nhp->rtnh_len = skb->tail - (unsigned char*)nhp; - } endfor_nexthops(fi); - mp_head->rta_type = RTA_MULTIPATH; - mp_head->rta_len = skb->tail - (u8*)mp_head; - } -#endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} + struct rtnexthop *rtnh; + struct nlattr *mp; -#ifndef CONFIG_IP_NOSIOCRT + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (mp == NULL) + goto nla_put_failure; -int -fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - struct kern_rta *rta, struct rtentry *r) -{ - int plen; - u32 *ptr; - - memset(rtm, 0, sizeof(*rtm)); - memset(rta, 0, sizeof(*rta)); - - if (r->rt_dst.sa_family != AF_INET) - return -EAFNOSUPPORT; - - /* Check mask for validity: - a) it must be contiguous. - b) destination must have all host bits clear. - c) if application forgot to set correct family (AF_INET), - reject request unless it is absolutely clear i.e. - both family and mask are zero. - */ - plen = 32; - ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; - if (!(r->rt_flags&RTF_HOST)) { - u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; - if (r->rt_genmask.sa_family != AF_INET) { - if (mask || r->rt_genmask.sa_family) - return -EAFNOSUPPORT; - } - if (bad_mask(mask, *ptr)) - return -EINVAL; - plen = inet_mask_len(mask); - } - - nl->nlmsg_flags = NLM_F_REQUEST; - nl->nlmsg_pid = 0; - nl->nlmsg_seq = 0; - nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); - if (cmd == SIOCDELRT) { - nl->nlmsg_type = RTM_DELROUTE; - nl->nlmsg_flags = 0; - } else { - nl->nlmsg_type = RTM_NEWROUTE; - nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; - rtm->rtm_protocol = RTPROT_BOOT; - } + for_nexthops(fi) { + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (rtnh == NULL) + goto nla_put_failure; - rtm->rtm_dst_len = plen; - rta->rta_dst = ptr; + rtnh->rtnh_flags = nh->nh_flags & 0xFF; + rtnh->rtnh_hops = nh->nh_weight - 1; + rtnh->rtnh_ifindex = nh->nh_oif; - if (r->rt_metric) { - *(u32*)&r->rt_pad3 = r->rt_metric - 1; - rta->rta_priority = (u32*)&r->rt_pad3; - } - if (r->rt_flags&RTF_REJECT) { - rtm->rtm_scope = RT_SCOPE_HOST; - rtm->rtm_type = RTN_UNREACHABLE; - return 0; - } - rtm->rtm_scope = RT_SCOPE_NOWHERE; - rtm->rtm_type = RTN_UNICAST; - - if (r->rt_dev) { - char *colon; - struct net_device *dev; - char devname[IFNAMSIZ]; - - if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) - return -EFAULT; - devname[IFNAMSIZ-1] = 0; - colon = strchr(devname, ':'); - if (colon) - *colon = 0; - dev = __dev_get_by_name(devname); - if (!dev) - return -ENODEV; - rta->rta_oif = &dev->ifindex; - if (colon) { - struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - return -ENODEV; - *colon = ':'; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) - if (strcmp(ifa->ifa_label, devname) == 0) - break; - if (ifa == NULL) - return -ENODEV; - rta->rta_prefsrc = &ifa->ifa_local; - } - } + if (nh->nh_gw) + NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); +#ifdef CONFIG_NET_CLS_ROUTE + if (nh->nh_tclassid) + NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); +#endif + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; + } endfor_nexthops(fi); - ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; - if (r->rt_gateway.sa_family == AF_INET && *ptr) { - rta->rta_gw = ptr; - if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) - rtm->rtm_scope = RT_SCOPE_UNIVERSE; + nla_nest_end(skb, mp); } +#endif + return nlmsg_end(skb, nlh); - if (cmd == SIOCDELRT) - return 0; - - if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) - return -EINVAL; - - if (rtm->rtm_scope == RT_SCOPE_NOWHERE) - rtm->rtm_scope = RT_SCOPE_LINK; - - if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { - struct rtattr *rec; - struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); - if (mx == NULL) - return -ENOMEM; - rta->rta_mx = mx; - mx->rta_type = RTA_METRICS; - mx->rta_len = RTA_LENGTH(0); - if (r->rt_flags&RTF_MTU) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_ADVMSS; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; - } - if (r->rt_flags&RTF_WINDOW) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_WINDOW; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_window; - } - if (r->rt_flags&RTF_IRTT) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_RTT; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; - } - } - return 0; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } -#endif - /* Update FIB if: - local address disappeared -> we must delete all the entries @@ -1160,7 +1017,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down(u32 local, struct net_device *dev, int force) +int fib_sync_down(__be32 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 95a639f2e3db..d17990ec724f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -52,7 +52,6 @@ #define VERSION "0.407" -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> @@ -1125,17 +1124,14 @@ err: return fa_head; } -static int -fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; struct list_head *fa_head = NULL; struct fib_info *fi; - int plen = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; u32 key, mask; int err; struct leaf *l; @@ -1143,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); - pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); mask = ntohl(inet_make_mask(plen)); @@ -1158,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = key & mask; - fi = fib_create_info(r, rta, nlhdr, &err); - - if (!fi) + fi = fib_create_info(cfg); + if (IS_ERR(fi)) { + err = PTR_ERR(fi); goto err; + } l = fib_find_node(t, key); fa = NULL; @@ -1186,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (nlhdr->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; @@ -1201,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fi_drop = fa->fa_info; new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state &= ~FA_S_ACCESSED; @@ -1225,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { goto out; } } - if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -1245,16 +1238,16 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. */ if (!fa_head) { - fa_head = fib_insert_node(t, &err, key, plen); err = 0; + fa_head = fib_insert_node(t, &err, key, plen); if (err) goto out_free_new_fa; } @@ -1263,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + &cfg->fc_nlinfo); succeeded: return 0; @@ -1282,18 +1276,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l, struct fib_result *res) { int err, i; - t_key mask; + __be32 mask; struct leaf_info *li; struct hlist_head *hhead = &l->list; struct hlist_node *node; hlist_for_each_entry_rcu(li, node, hhead, hlist) { i = li->plen; - mask = ntohl(inet_make_mask(i)); - if (l->key != (key & mask)) + mask = inet_make_mask(i); + if (l->key != (key & ntohl(mask))) continue; - if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) { + if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { *plen = i; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++; @@ -1549,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key) return 1; } -static int -fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; - int plen = r->rtm_dst_len; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; struct leaf_info *li; - if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); mask = ntohl(inet_make_mask(plen)); if (key & ~mask) @@ -1599,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (fa->fa_tos != tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, nlhdr, rta, fi) == 0) { + if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -1615,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, return -ESRCH; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, + &cfg->fc_nlinfo); l = fib_find_node(t, key); li = find_leaf_info(l, plen); @@ -1847,9 +1834,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi int i, s_i; struct fib_alias *fa; - u32 xkey = htonl(key); + __be32 xkey = htonl(key); - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1867,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi tb->tb_id, fa->fa_type, fa->fa_scope, - &xkey, + xkey, plen, fa->fa_tos, fa->fa_info, 0) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } i++; } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -1887,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str struct list_head *fa_head; struct leaf *l = NULL; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); fa_head = get_fa_head(l, plen); @@ -1905,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str continue; if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -1918,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin int m, s_m; struct trie *t = (struct trie *) tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; rcu_read_lock(); for (m = 0; m <= 32; m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { - cb->args[1] = m; + cb->args[2] = m; goto out; } } rcu_read_unlock(); - cb->args[1] = m; + cb->args[2] = m; return skb->len; out: rcu_read_unlock(); @@ -1944,9 +1931,9 @@ out: /* Fix more generic FIB names for init later */ #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; @@ -2294,7 +2281,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; - t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); + __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); if (!NODE_PARENT(n)) { if (iter->trie == trie_local) @@ -2310,7 +2297,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) } else { struct leaf *l = (struct leaf *) n; int i; - u32 val = ntohl(l->key); + __be32 val = htonl(l->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); @@ -2373,7 +2360,7 @@ static struct file_operations fib_trie_fops = { .release = seq_release_private, }; -static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) +static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) { static unsigned type2flags[RTN_MAX + 1] = { [7] = RTF_REJECT, [8] = RTF_REJECT, @@ -2382,7 +2369,7 @@ static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) if (fi && fi->fib_nh->nh_gw) flags |= RTF_GATEWAY; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; return flags; @@ -2416,7 +2403,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) for (i=32; i>=0; i--) { struct leaf_info *li = find_leaf_info(l, i); struct fib_alias *fa; - u32 mask, prefix; + __be32 mask, prefix; if (!li) continue; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 017900172f7d..b39a37a47545 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -64,7 +64,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/jiffies.h> @@ -105,7 +104,7 @@ struct icmp_bxm { struct { struct icmphdr icmph; - __u32 times[3]; + __be32 times[3]; } data; int head_len; struct ip_options replyopts; @@ -188,11 +187,11 @@ struct icmp_err icmp_err_convert[] = { }; /* Control parameters for ECHO replies. */ -int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts = 1; +int sysctl_icmp_echo_ignore_all __read_mostly; +int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; /* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses = 1; +int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; /* * Configurable global rate limit. @@ -206,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1; * time exceeded (11), parameter problem (12) */ -int sysctl_icmp_ratelimit = 1 * HZ; -int sysctl_icmp_ratemask = 0x1818; -int sysctl_icmp_errors_use_inbound_ifaddr; +int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; +int sysctl_icmp_ratemask __read_mostly = 0x1818; +int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; /* * ICMP control array. This specifies what to do with each ICMP. @@ -382,7 +381,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; - u32 daddr; + __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; @@ -407,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -430,14 +430,14 @@ out_unlock: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) +void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable *)skb_in->dst; struct ipcm_cookie ipc; - u32 saddr; + __be32 saddr; u8 tos; if (!rt) @@ -561,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) } } }; + security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -894,7 +895,7 @@ static void icmp_address_reply(struct sk_buff *skb) if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { - u32 _mask, *mp; + __be32 _mask, *mp; mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); BUG_ON(mp == NULL); @@ -929,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index ab680c851aa2..6eee71647b7c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -72,7 +72,6 @@ * Vinay Kulkarni */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -139,14 +138,14 @@ time_before(jiffies, (in_dev)->mr_v2_seen))) static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); #endif static void ip_mc_clear_src(struct ip_mc_list *pmc); -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta); +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta); static void ip_ma_put(struct ip_mc_list *im) { @@ -427,7 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, first = 1; psf_prev = NULL; for (psf=*psf_list; psf; psf=psf_next) { - u32 *psrc; + __be32 *psrc; psf_next = psf->sf_next; @@ -440,7 +439,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (isquery) psf->sf_gsresp = 0; - if (AVAILABLE(skb) < sizeof(u32) + + if (AVAILABLE(skb) < sizeof(__be32) + first*sizeof(struct igmpv3_grec)) { if (truncate && !first) break; /* truncate these */ @@ -456,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } - psrc = (u32 *)skb_put(skb, sizeof(u32)); + psrc = (__be32 *)skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || @@ -631,8 +630,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; - u32 group = pmc ? pmc->multiaddr : 0; - u32 dst; + __be32 group = pmc ? pmc->multiaddr : 0; + __be32 dst; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -749,7 +748,7 @@ static void igmp_timer_expire(unsigned long data) } /* mark EXCLUDE-mode sources */ -static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; @@ -776,7 +775,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; @@ -804,7 +803,7 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static void igmp_heard_report(struct in_device *in_dev, u32 group) +static void igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; @@ -829,7 +828,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, struct igmphdr *ih = skb->h.igmph; struct igmpv3_query *ih3 = (struct igmpv3_query *)ih; struct ip_mc_list *im; - u32 group = ih->group; + __be32 group = ih->group; int max_delay; int mark = 0; @@ -863,7 +862,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, ih3 = (struct igmpv3_query *) skb->h.raw; if (ih3->nsrcs) { if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) - + ntohs(ih3->nsrcs)*sizeof(__u32))) + + ntohs(ih3->nsrcs)*sizeof(__be32))) return; ih3 = (struct igmpv3_query *) skb->h.raw; } @@ -932,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb) goto drop; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ @@ -986,7 +985,7 @@ drop: * Add a filter to a device */ -static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; @@ -1006,7 +1005,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) * Remove a filter from a device */ -static void ip_mc_filter_del(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; @@ -1029,10 +1028,9 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kmalloc(sizeof(*pmc), GFP_KERNEL); + pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; - memset(pmc, 0, sizeof(*pmc)); spin_lock_bh(&im->lock); pmc->interface = im->interface; in_dev_hold(in_dev); @@ -1057,7 +1055,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) { struct ip_mc_list *pmc, *pmc_prev; struct ip_sf_list *psf, *psf_next; @@ -1195,7 +1193,7 @@ static void igmp_group_added(struct ip_mc_list *im) * A socket has joined a multicast group on device dev. */ -void ip_mc_inc_group(struct in_device *in_dev, u32 addr) +void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *im; @@ -1254,7 +1252,7 @@ out: * A socket has left a multicast group on device dev */ -void ip_mc_dec_group(struct in_device *in_dev, u32 addr) +void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *i, **ip; @@ -1399,12 +1397,12 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) /* * Join a socket to a group */ -int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; -int sysctl_igmp_max_msf = IP_MAX_MSF; +int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; +int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; int rv = 0; @@ -1452,8 +1450,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #define igmp_ifc_event(x) do { } while (0) #endif -static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int changerec = 0; @@ -1519,7 +1517,7 @@ out_unlock: * Add multicast single-source filter to the interface list */ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc, int delta) + __be32 *psfsrc, int delta) { struct ip_sf_list *psf, *psf_prev; @@ -1530,10 +1528,9 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, psf_prev = psf; } if (!psf) { - psf = kmalloc(sizeof(*psf), GFP_ATOMIC); + psf = kzalloc(sizeof(*psf), GFP_ATOMIC); if (!psf) return -ENOBUFS; - memset(psf, 0, sizeof(*psf)); psf->sf_inaddr = *psfsrc; if (psf_prev) { psf_prev->sf_next = psf; @@ -1626,8 +1623,8 @@ static int sf_setstate(struct ip_mc_list *pmc) /* * Add multicast source filter list to the interface list */ -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int isexclude; @@ -1720,7 +1717,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; - u32 addr = imr->imr_multiaddr.s_addr; + __be32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -1794,31 +1791,37 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; - u32 group = imr->imr_multiaddr.s_addr; + __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; + int ret = -EADDRNOTAVAIL; rtnl_lock(); in_dev = ip_mc_find_dev(imr); - if (!in_dev) { - rtnl_unlock(); - return -ENODEV; - } ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { - if (iml->multi.imr_multiaddr.s_addr == group && - iml->multi.imr_ifindex == ifindex) { - (void) ip_mc_leave_src(sk, iml, in_dev); + if (iml->multi.imr_multiaddr.s_addr != group) + continue; + if (ifindex) { + if (iml->multi.imr_ifindex != ifindex) + continue; + } else if (imr->imr_address.s_addr && imr->imr_address.s_addr != + iml->multi.imr_address.s_addr) + continue; + + (void) ip_mc_leave_src(sk, iml, in_dev); - *imlp = iml->next; + *imlp = iml->next; + if (in_dev) ip_mc_dec_group(in_dev, group); - rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); - return 0; - } + rtnl_unlock(); + sock_kfree_s(sk, iml, sizeof(*iml)); + return 0; } + if (!in_dev) + ret = -ENODEV; rtnl_unlock(); - return -EADDRNOTAVAIL; + return ret; } int ip_mc_source(int add, int omode, struct sock *sk, struct @@ -1826,7 +1829,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct { int err; struct ip_mreqn imr; - u32 addr = mreqs->imr_multiaddr; + __be32 addr = mreqs->imr_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); @@ -1880,7 +1883,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct rv = !0; for (i=0; i<psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1932,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1957,7 +1960,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) { int err = 0; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -2041,7 +2044,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, { int err, len, count, copycount; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -2100,7 +2103,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, { int err, i, count, copycount; struct sockaddr_in *psin; - u32 addr; + __be32 addr; struct ip_mc_socklist *pmc; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; @@ -2153,7 +2156,7 @@ done: /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif) +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; @@ -2202,18 +2205,18 @@ void ip_mc_drop_socket(struct sock *sk) struct in_device *in_dev; inet->mc_list = iml->next; - if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) { - (void) ip_mc_leave_src(sk, iml, in_dev); + in_dev = inetdev_by_index(iml->multi.imr_ifindex); + (void) ip_mc_leave_src(sk, iml, in_dev); + if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } sock_kfree_s(sk, iml, sizeof(*iml)); - } rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) +int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; struct ip_sf_list *psf; @@ -2381,7 +2384,7 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct igmp_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct igmp_mc_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; @@ -2391,7 +2394,6 @@ static int igmp_mc_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: @@ -2556,7 +2558,7 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct igmp_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct igmp_mcf_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; @@ -2566,7 +2568,6 @@ static int igmp_mcf_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9a01bb81f8bf..96bbe2a0aa1b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -13,7 +13,6 @@ * 2 of the License, or(at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/jhash.h> @@ -40,7 +39,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 }; int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); + const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -53,7 +52,7 @@ int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; @@ -328,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -342,10 +342,10 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_route_req); -static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, +static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); + return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -356,8 +356,8 @@ static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, - const __u16 rport, const __u32 raddr, - const __u32 laddr) + const __be16 rport, const __be32 raddr, + const __be32 laddr) { const struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; @@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + + security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 457db99c76df..77761ac4f7bb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -11,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -37,8 +36,8 @@ static const struct inet_diag_handler **inet_diag_table; struct inet_diag_entry { - u32 *saddr; - u32 *daddr; + __be32 *saddr; + __be32 *daddr; u16 sport; u16 dport; u16 family; @@ -295,7 +294,7 @@ out: return err; } -static int bitstring_match(const u32 *a1, const u32 *a2, int bits) +static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) { int words = bits >> 5; @@ -306,8 +305,8 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) return 0; } if (bits) { - __u32 w1, w2; - __u32 mask; + __be32 w1, w2; + __be32 mask; w1 = a1[words]; w2 = a2[words]; @@ -353,7 +352,7 @@ static int inet_diag_bc_run(const void *bc, int len, case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { struct inet_diag_hostcond *cond; - u32 *addr; + __be32 *addr; cond = (struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && @@ -910,11 +909,10 @@ static int __init inet_diag_init(void) sizeof(struct inet_diag_handler *)); int err = -ENOMEM; - inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); + inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL); if (!inet_diag_table) goto out; - memset(inet_diag_table, 0, inet_diag_table_size); idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, THIS_MODULE); if (idiagnl == NULL) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ee9b5515b9ae..244c4f445c7d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -13,7 +13,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> @@ -125,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, - const unsigned short hnum, const int dif) +static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, + const __be32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -136,7 +137,7 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad const struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { - const __u32 rcv_saddr = inet->rcv_saddr; + const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; if (rcv_saddr) { @@ -160,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad return result; } +/* Optimize the common listener case. */ +struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const __be32 daddr, const unsigned short hnum, + const int dif) +{ + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = inet_lookup_listener_slow(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* called with local bh disabled */ @@ -169,11 +197,11 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - u32 daddr = inet->rcv_saddr; - u32 saddr = inet->daddr; + __be32 daddr = inet->rcv_saddr; + __be32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct sock *sk2; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 417f126c749e..cdd805344c61 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -8,7 +8,6 @@ * From code orinally in TCP */ -#include <linux/config.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2160874ce7aa..2b1a54b59c48 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -86,7 +86,7 @@ static struct inet_peer *peer_root = peer_avl_empty; static DEFINE_RWLOCK(peer_pool_lock); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static volatile int peer_total; +static int peer_total; /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold = 65536 + 128; /* start to throw entries more * aggressively at this stage */ @@ -126,12 +126,9 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!peer_cachep) - panic("cannot create inet_peer_cache"); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ @@ -166,7 +163,7 @@ static void unlink_from_unused(struct inet_peer *p) for (u = peer_root; u != peer_avl_empty; ) { \ if (daddr == u->v4daddr) \ break; \ - if (daddr < u->v4daddr) \ + if ((__force __u32)daddr < (__force __u32)u->v4daddr) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ @@ -371,7 +368,7 @@ static int cleanup_once(unsigned long ttl) } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__u32 daddr, int create) +struct inet_peer *inet_getpeer(__be32 daddr, int create) { struct inet_peer *p, *n; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 9f0bb529ab70..a22d11d2911c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -21,7 +21,6 @@ * Mike McLagan : Routing by source */ -#include <linux/config.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/sched.h> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index da734c439179..74046efdf875 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -23,7 +23,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> @@ -55,15 +54,15 @@ * even the most extreme cases without allowing an attacker to measurably * harm machine performance. */ -int sysctl_ipfrag_high_thresh = 256*1024; -int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; +int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; -int sysctl_ipfrag_max_dist = 64; +int sysctl_ipfrag_max_dist __read_mostly = 64; /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ -int sysctl_ipfrag_time = IP_FRAG_TIME; +int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; struct ipfrag_skb_cb { @@ -78,9 +77,9 @@ struct ipq { struct hlist_node list; struct list_head lru_list; /* lru list member */ u32 user; - u32 saddr; - u32 daddr; - u16 id; + __be32 saddr; + __be32 daddr; + __be16 id; u8 protocol; u8 last_in; #define COMPLETE 4 @@ -124,14 +123,15 @@ static __inline__ void ipq_unlink(struct ipq *ipq) write_unlock(&ipfrag_lock); } -static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) +static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { - return jhash_3words((u32)id << 16 | prot, saddr, daddr, + return jhash_3words((__force u32)id << 16 | prot, + (__force u32)saddr, (__force u32)daddr, ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); } static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; +int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; static void ipfrag_secret_rebuild(unsigned long dummy) { @@ -388,8 +388,8 @@ out_nomem: static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { __be16 id = iph->id; - __u32 saddr = iph->saddr; - __u32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + __be32 daddr = iph->daddr; __u8 protocol = iph->protocol; unsigned int hash; struct ipq *qp; @@ -666,7 +666,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip_frag_mem); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ab99bebdcdc8..f5fba051df3d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -11,7 +11,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> @@ -394,7 +393,8 @@ out: int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - int rel_info = 0; + __be32 rel_info = 0; + __u32 n = 0; u16 flags; int grehlen = (iph->ihl<<2) + 4; struct sk_buff *skb2; @@ -423,14 +423,16 @@ out: default: return; case ICMP_PARAMETERPROB: - if (skb->h.icmph->un.gateway < (iph->ihl<<2)) + n = ntohl(skb->h.icmph->un.gateway) >> 24; + if (n < (iph->ihl<<2)) return; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = skb->h.icmph->un.gateway - grehlen; + n -= grehlen; + rel_info = htonl(n << 24); break; case ICMP_DEST_UNREACH: @@ -441,13 +443,14 @@ out: return; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - rel_info = ntohs(skb->h.icmph->un.frag.mtu); - if (rel_info < grehlen+68) + n = ntohs(skb->h.icmph->un.frag.mtu); + if (n < grehlen+68) return; - rel_info -= grehlen; + n -= grehlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (rel_info > ntohs(eiph->tot_len)) + if (n > ntohs(eiph->tot_len)) return; + rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -509,12 +512,11 @@ out: /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) { + if (n > dst_mtu(skb2->dst)) { kfree_skb(skb2); return; } - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); + skb2->dst->ops->update_pmtu(skb2->dst, n); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { @@ -577,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb) if (flags&GRE_CSUM) { switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: csum = (u16)csum_fold(skb->csum); if (!csum) break; @@ -585,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } offset += 4; } @@ -618,7 +620,6 @@ static int ipgre_rcv(struct sk_buff *skb) skb->mac.raw = skb->nh.raw; skb->nh.raw = __pskb_pull(skb, offset); skb_postpull_rcsum(skb, skb->h.raw, offset); - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); skb->pkt_type = PACKET_HOST; #ifdef CONFIG_NET_IPGRE_BROADCAST if (MULTICAST(iph->daddr)) { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c9026dbf4c93..212734ca238f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -121,7 +121,6 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/config.h> #include <linux/net.h> #include <linux/socket.h> @@ -429,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + /* Remove any debris in the socket control block */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index cbcae6544622..8dabbfc31267 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -24,6 +24,7 @@ #include <net/ip.h> #include <net/icmp.h> #include <net/route.h> +#include <net/cipso_ipv4.h> /* * Write options to IP header, record destination address to @@ -37,7 +38,7 @@ */ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, - u32 daddr, struct rtable *rt, int is_frag) + __be32 daddr, struct rtable *rt, int is_frag) { unsigned char * iph = skb->nh.raw; @@ -56,7 +57,7 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); if (opt->ts_needtime) { struct timeval tv; - __u32 midtime; + __be32 midtime; do_gettimeofday(&tv); midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); @@ -90,7 +91,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) unsigned char *sptr, *dptr; int soffset, doffset; int optlen; - u32 daddr; + __be32 daddr; memset(dopt, 0, sizeof(struct ip_options)); @@ -147,7 +148,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->ts_needtime = 0; if (soffset + 8 <= optlen) { - __u32 addr; + __be32 addr; memcpy(&addr, sptr+soffset-1, 4); if (inet_addr_type(addr) != RTN_LOCAL) { @@ -164,7 +165,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) } if (sopt->srr) { unsigned char * start = sptr+sopt->srr; - u32 faddr; + __be32 faddr; optlen = start[1]; soffset = start[2]; @@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->is_strictroute = sopt->is_strictroute; } } + if (sopt->cipso) { + optlen = sptr[sopt->cipso+1]; + dopt->cipso = dopt->optlen+sizeof(struct iphdr); + memcpy(dptr, sptr+sopt->cipso, optlen); + dptr += optlen; + dopt->optlen += optlen; + } while (dopt->optlen & 3) { *dptr++ = IPOPT_END; dopt->optlen++; @@ -256,7 +264,6 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (!opt) { opt = &(IPCB(skb)->opt); - memset(opt, 0, sizeof(struct ip_options)); iph = skb->nh.raw; opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); optptr = iph + sizeof(struct iphdr); @@ -355,7 +362,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) goto error; } if (optptr[2] <= optlen) { - __u32 * timeptr = NULL; + __be32 *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -364,7 +371,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__u32*)&optptr[optptr[2]-1]; + timeptr = (__be32*)&optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -376,7 +383,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) opt->ts = optptr - iph; if (skb) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__u32*)&optptr[optptr[2]+3]; + timeptr = (__be32*)&optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -389,12 +396,12 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) } opt->ts = optptr - iph; { - u32 addr; + __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); if (inet_addr_type(addr) == RTN_UNICAST) break; if (skb) - timeptr = (__u32*)&optptr[optptr[2]+3]; + timeptr = (__be32*)&optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -408,10 +415,10 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) } if (timeptr) { struct timeval tv; - __u32 midtime; + __be32 midtime; do_gettimeofday(&tv); midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); - memcpy(timeptr, &midtime, sizeof(__u32)); + memcpy(timeptr, &midtime, sizeof(__be32)); opt->is_changed = 1; } } else { @@ -435,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; + case IPOPT_CIPSO: + if (opt->cipso) { + pp_ptr = optptr; + goto error; + } + opt->cipso = optptr - iph; + if (cipso_v4_validate(&optptr)) { + pp_ptr = optptr; + goto error; + } + break; case IPOPT_SEC: case IPOPT_SID: default: @@ -507,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp, opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; opt->is_data = 1; - opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { kfree(opt); return -EINVAL; @@ -590,7 +607,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; - u32 nexthop; + __be32 nexthop; struct iphdr *iph = skb->nh.iph; unsigned char * optptr = skb->nh.raw + opt->srr; struct rtable *rt = (struct rtable*)skb->dst; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7624fd1d8f9f..fc195a44fc2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -53,7 +53,6 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -84,7 +83,7 @@ #include <linux/netlink.h> #include <linux/tcp.h> -int sysctl_ip_default_ttl = IPDEFTTL; +int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) @@ -119,7 +118,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) * */ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, - u32 saddr, u32 daddr, struct ip_options *opt) + __be32 saddr, __be32 daddr, struct ip_options *opt) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)skb->dst; @@ -210,7 +209,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) + if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -307,7 +306,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); if (rt == NULL) { - u32 daddr; + __be32 daddr; /* Use correct destination address if we have options. */ daddr = inet->daddr; @@ -329,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * keep trying until route appears or the connection times * itself out. */ + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } @@ -426,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs; + unsigned int mtu, hlen, left, len, ll_rs, pad; int offset; __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; @@ -441,6 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) iph = skb->nh.iph; if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { + IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dst_mtu(&rt->u.dst))); kfree_skb(skb); @@ -527,6 +528,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); + if (!err) + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -553,14 +556,13 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = raw + hlen; /* Where to start from */ -#ifdef CONFIG_BRIDGE_NETFILTER /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); - mtu -= nf_bridge_pad(skb); -#else - ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); -#endif + * we need to make room for the encapsulating header + */ + pad = nf_bridge_pad(skb); + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); + mtu -= pad; + /* * Fragment the datagram. */ @@ -650,9 +652,6 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); - iph->tot_len = htons(len + hlen); ip_send_check(iph); @@ -660,6 +659,8 @@ slow_path: err = output(skb2); if (err) goto fail; + + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGOKS); @@ -678,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk { struct iovec *iov = from; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { @@ -734,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -744,7 +745,7 @@ static inline int ip_ufo_append_data(struct sock *sk, if (!err) { /* specify the length of each IP datagram fragment*/ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; __skb_queue_tail(&sk->sk_write_queue, skb); return 0; @@ -842,7 +843,7 @@ int ip_append_data(struct sock *sk, length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_ALL_CSUM && !exthdrlen) - csummode = CHECKSUM_HW; + csummode = CHECKSUM_PARTIAL; inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && @@ -947,7 +948,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; @@ -1089,14 +1090,14 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if ((sk->sk_protocol == IPPROTO_UDP) && (rt->u.dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } while (size > 0) { int i; - if (skb_shinfo(skb)->gso_size) + if (skb_is_gso(skb)) len = size; else { @@ -1142,7 +1143,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, data, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } /* @@ -1339,7 +1340,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar char data[40]; } replyopts; struct ipcm_cookie ipc; - u32 daddr; + __be32 daddr; struct rtable *rt = (struct rtable*)skb->dst; if (ip_options_echo(&replyopts.opt, skb)) @@ -1365,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { .sport = skb->h.th->dest, .dport = skb->h.th->source } }, .proto = sk->sk_protocol }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) return; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 12e0bf19f24a..4b132953bcc2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -17,7 +17,6 @@ * Mike McLagan : Routing by source */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> @@ -113,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; - u32 seclen; + u32 seclen, secid; int err; - err = security_socket_getpeersec_dgram(skb, &secdata, &seclen); + err = security_socket_getpeersec_dgram(NULL, skb, &secid); + if (err) + return; + + err = security_secid_to_secctx(secid, &secdata, &seclen); if (err) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); + security_release_secctx(secdata, seclen); } @@ -250,7 +254,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s } void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - u16 port, u32 info, u8 *payload) + __be16 port, u32 info, u8 *payload) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; @@ -279,7 +283,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info) +void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 3ed8b57a1002..2017d36024d4 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -13,7 +13,6 @@ * - Compression stats. * - Adaptive compression. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/scatterlist.h> #include <asm/semaphore.h> @@ -33,7 +32,7 @@ struct ipcomp_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -47,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) int err, plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; plen = skb->len; @@ -71,7 +70,8 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (err) goto out; - skb_put(skb, dlen - plen); + skb->truesize += dlen - plen; + __skb_put(skb, dlen - plen); memcpy(skb->data, scratch, dlen); out: put_cpu(); @@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; ihlen = iph->ihl * 4; @@ -176,14 +176,14 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) return 0; out_ok: - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) ip_send_check(iph); return 0; } static void ipcomp4_err(struct sk_buff *skb, u32 info) { - u32 spi; + __be32 spi; struct iphdr *iph = (struct iphdr *)skb->data; struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; @@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void) return scratches; } -static void ipcomp_free_tfms(struct crypto_tfm **tfms) +static void ipcomp_free_tfms(struct crypto_comp **tfms) { struct ipcomp_tfms *pos; int cpu; @@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) { struct ipcomp_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; @@ -410,13 +411,12 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; err = -ENOMEM; - ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL); + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); if (!ipcd) goto out; - memset(ipcd, 0, sizeof(*ipcd)); x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); mutex_lock(&ipcomp_resource_mutex); @@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index cb8a92f18ef6..1fbb38415b19 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -31,7 +31,6 @@ * -- Josef Siemes <jsiemes@web.de>, Aug 2002 */ -#include <linux/config.h> #include <linux/types.h> #include <linux/string.h> #include <linux/kernel.h> diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ea398ee43f28..0c4556529228 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -94,7 +94,6 @@ #include <linux/capability.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> @@ -342,7 +341,8 @@ out: int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - int rel_info = 0; + __be32 rel_info = 0; + __u32 n = 0; struct sk_buff *skb2; struct flowi fl; struct rtable *rt; @@ -355,14 +355,15 @@ out: default: return 0; case ICMP_PARAMETERPROB: - if (skb->h.icmph->un.gateway < hlen) + n = ntohl(skb->h.icmph->un.gateway) >> 24; + if (n < hlen) return 0; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = skb->h.icmph->un.gateway - hlen; + rel_info = htonl((n - hlen) << 24); break; case ICMP_DEST_UNREACH: @@ -373,13 +374,14 @@ out: return 0; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - rel_info = ntohs(skb->h.icmph->un.frag.mtu); - if (rel_info < hlen+68) + n = ntohs(skb->h.icmph->un.frag.mtu); + if (n < hlen+68) return 0; - rel_info -= hlen; + n -= hlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (rel_info > ntohs(eiph->tot_len)) + if (n > ntohs(eiph->tot_len)) return 0; + rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -441,12 +443,11 @@ out: /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) { + if (n > dst_mtu(skb2->dst)) { kfree_skb(skb2); return 0; } - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); + skb2->dst->ops->update_pmtu(skb2->dst, n); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { @@ -488,7 +489,6 @@ static int ipip_rcv(struct sk_buff *skb) skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 717ab7d6d7b6..97cfa97c8abb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -28,7 +28,6 @@ * */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> @@ -313,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e = NLMSG_DATA(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -462,7 +462,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return 0; } -static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) +static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) { int line=MFC_HASH(mcastgrp,origin); struct mfc_cache *c; @@ -513,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (skb->nh.iph->version == 0) { - int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { @@ -526,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } - err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } @@ -1097,7 +1097,7 @@ static struct notifier_block ip_mr_notifier={ * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) +static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); @@ -1462,7 +1462,6 @@ int pim_rcv_v1(struct sk_buff * skb) skb_pull(skb, (u8*)encap - skb->data); skb->nh.iph = (struct iphdr *)skb->data; skb->dev = reg_dev; - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); skb->protocol = htons(ETH_P_IP); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; @@ -1518,7 +1517,6 @@ static int pim_rcv(struct sk_buff * skb) skb_pull(skb, (u8*)encap - skb->data); skb->nh.iph = (struct iphdr *)skb->data; skb->dev = reg_dev; - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); skb->protocol = htons(ETH_P_IP); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; @@ -1581,6 +1579,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); if (cache==NULL) { + struct sk_buff *skb2; struct net_device *dev; int vif; @@ -1594,12 +1593,18 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) read_unlock(&mrt_lock); return -ENODEV; } - skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); - skb->nh.iph->ihl = sizeof(struct iphdr)>>2; - skb->nh.iph->saddr = rt->rt_src; - skb->nh.iph->daddr = rt->rt_dst; - skb->nh.iph->version = 0; - err = ipmr_cache_unresolved(vif, skb); + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) { + read_unlock(&mrt_lock); + return -ENOMEM; + } + + skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr)); + skb2->nh.iph->ihl = sizeof(struct iphdr)>>2; + skb2->nh.iph->saddr = rt->rt_src; + skb2->nh.iph->daddr = rt->rt_dst; + skb2->nh.iph->version = 0; + err = ipmr_cache_unresolved(vif, skb2); read_unlock(&mrt_lock); return err; } @@ -1895,11 +1900,8 @@ void __init ip_mr_init(void) { mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!mrt_cachep) - panic("cannot allocate ip_mrt_cache"); - init_timer(&ipmr_expire_timer); ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 87b83813cf2c..8832eb517d52 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -115,9 +115,9 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port) +static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) { - return jhash_3words(addr, port, proto, ip_vs_conn_rnd) + return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -188,7 +188,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; @@ -215,7 +215,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { struct ip_vs_conn *cp; @@ -234,7 +234,7 @@ struct ip_vs_conn *ip_vs_conn_in_get /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; @@ -274,7 +274,7 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -324,7 +324,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) /* * Fill a no_client_port connection with a client port number */ -void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport) +void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { if (ip_vs_conn_unhash(cp)) { spin_lock(&cp->lock); @@ -508,10 +508,10 @@ int ip_vs_check_template(struct ip_vs_conn *ct) /* * Invalidate the connection template */ - if (ct->vport != 65535) { + if (ct->vport != htons(0xffff)) { if (ip_vs_conn_unhash(ct)) { - ct->dport = 65535; - ct->vport = 65535; + ct->dport = htons(0xffff); + ct->vport = htons(0xffff); ct->cport = 0; ip_vs_conn_hash(ct); } @@ -596,8 +596,8 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport, - __u32 daddr, __u16 dport, unsigned flags, +ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, + __be32 daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3f47ad8e1cad..6dee03935f78 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -209,14 +209,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, const struct sk_buff *skb, - __u16 ports[2]) + __be16 ports[2]) { struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - __u16 dport; /* destination port to forward */ - __u32 snet; /* source network of the client, after masking */ + __be16 dport; /* destination port to forward */ + __be32 snet; /* source network of the client, after masking */ /* Mask saddr with the netmask to adjust template granularity */ snet = iph->saddr & svc->netmask; @@ -383,7 +383,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports); @@ -446,7 +446,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; struct iphdr *iph = skb->nh.iph; pptr = skb_header_pointer(skb, iph->ihl*4, @@ -576,7 +576,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, /* the TCP/UDP port */ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { - __u16 *ports = (void *)ciph + ciph->ihl*4; + __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) ports[1] = cp->vport; @@ -775,7 +775,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, ihl, sizeof(_ports), _ports); diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index f28ec6882162..f261616e4602 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -283,7 +283,7 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); * Returns hash value for virtual service */ static __inline__ unsigned -ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port) +ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) { register unsigned porth = ntohs(port); @@ -365,7 +365,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) * Get service by {proto,addr,port} in the service table. */ static __inline__ struct ip_vs_service * -__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport) +__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; @@ -410,7 +410,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport) +ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -480,7 +480,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port) +static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) { register unsigned porth = ntohs(port); @@ -531,7 +531,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by <proto,addr,port> in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) +ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -562,7 +562,7 @@ ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) { struct ip_vs_dest *dest; @@ -591,7 +591,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -735,12 +735,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; - dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); + dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); if (dest == NULL) { IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n"); return -ENOMEM; } - memset(dest, 0, sizeof(struct ip_vs_dest)); dest->protocol = svc->protocol; dest->vaddr = svc->addr; @@ -774,8 +773,8 @@ static int ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; int ret; EnterFunction(2); @@ -880,8 +879,8 @@ static int ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; EnterFunction(2); @@ -992,8 +991,8 @@ static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; EnterFunction(2); @@ -1050,14 +1049,12 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) goto out_mod_dec; } - svc = (struct ip_vs_service *) - kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); + svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); if (svc == NULL) { IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); ret = -ENOMEM; goto out_err; } - memset(svc, 0, sizeof(struct ip_vs_service)); /* I'm the first user of the service */ atomic_set(&svc->usecnt, 1); @@ -1797,7 +1794,7 @@ static int ip_vs_info_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct ip_vs_iter *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; @@ -1808,7 +1805,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 9fee19c4c617..502111fba872 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -66,7 +66,7 @@ struct ip_vs_dh_bucket { /* * Returns hash value for IPVS DH entry */ -static inline unsigned ip_vs_dh_hashkey(__u32 addr) +static inline unsigned ip_vs_dh_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; } @@ -76,7 +76,7 @@ static inline unsigned ip_vs_dh_hashkey(__u32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __u32 addr) +ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) { return (tbl[ip_vs_dh_hashkey(addr)]).dest; } diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index c453e1e57f4b..7d68b80c4c19 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -13,7 +13,6 @@ * Changes: * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/slab.h> @@ -124,11 +123,10 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est; - est = kmalloc(sizeof(*est), GFP_KERNEL); + est = kzalloc(sizeof(*est), GFP_KERNEL); if (est == NULL) return -ENOMEM; - memset(est, 0, sizeof(*est)); est->stats = stats; est->last_conns = stats->conns; est->cps = stats->cps<<10; diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index a19a33ceb811..e433cb0ff894 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -32,6 +32,7 @@ #include <linux/ip.h> #include <net/protocol.h> #include <net/tcp.h> +#include <asm/unaligned.h> #include <net/ip_vs.h> @@ -44,16 +45,9 @@ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper * First port is set to the default port. */ -static int ports[IP_VS_APP_MAX_PORTS] = {21, 0}; -module_param_array(ports, int, NULL, 0); - -/* - * Debug level - */ -#ifdef CONFIG_IP_VS_DEBUG -static int debug=0; -module_param(debug, int, 0); -#endif +static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0}; +module_param_array(ports, ushort, NULL, 0); +MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); /* Dummy variable */ @@ -81,7 +75,7 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) */ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, const char *pattern, size_t plen, char term, - __u32 *addr, __u16 *port, + __be32 *addr, __be16 *port, char **start, char **end) { unsigned char p[6]; @@ -121,8 +115,8 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (i != 5) return -1; - *addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0]; - *port = (p[5]<<8) | p[4]; + *addr = get_unaligned((__be32 *)p); + *port = get_unaligned((__be16 *)(p + 4)); return 1; } @@ -147,8 +141,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_limit; char *start, *end; - __u32 from; - __u16 port; + __be32 from; + __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; @@ -177,7 +171,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, &start, &end) != 1) return 1; - IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> " + IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " "%u.%u.%u.%u:%d detected\n", NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0); @@ -206,7 +200,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, from = n_cp->vaddr; port = n_cp->vport; sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), - port&255, (port>>8)&255); + ntohs(port)&255, (ntohs(port)>>8)&255); buf_len = strlen(buf); /* @@ -250,8 +244,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_start, *data_limit; char *start, *end; - __u32 to; - __u16 port; + __be32 to; + __be16 port; struct ip_vs_conn *n_cp; /* no diff required for incoming packets */ @@ -280,7 +274,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { /* Passive mode on */ - IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n", + IP_VS_DBG(7, "got PASV at %zd of %zd\n", data - data_start, data_limit - data_start); cp->app_data = &ip_vs_ftp_pasv; @@ -302,7 +296,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, &start, &end) != 1) return 1; - IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n", + IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", NIPQUAD(to), ntohs(port)); /* Passive mode off */ @@ -311,7 +305,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Now update or create a connection entry for it */ - IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", + IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", ip_vs_proto_name(iph->protocol), NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0); @@ -375,8 +369,8 @@ static int __init ip_vs_ftp_init(void) ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); if (ret) break; - IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n", - app->name, i, ports[i]); + IP_VS_INFO("%s: loaded support on port[%d] = %d\n", + app->name, i, ports[i]); } if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 6e5cb92a5c83..524751e031de 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -87,7 +87,7 @@ static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; */ struct ip_vs_lblc_entry { struct list_head list; - __u32 addr; /* destination IP address */ + __be32 addr; /* destination IP address */ struct ip_vs_dest *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ }; @@ -160,7 +160,7 @@ static struct ctl_table_header * sysctl_header; * IP address to a server. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest) +ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) { struct ip_vs_lblc_entry *en; @@ -195,7 +195,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) /* * Returns hash value for IPVS LBLC entry */ -static inline unsigned ip_vs_lblc_hashkey(__u32 addr) +static inline unsigned ip_vs_lblc_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK; } @@ -234,7 +234,7 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) * Get ip_vs_lblc_entry associated with supplied parameters. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr) +ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) { unsigned hash; struct ip_vs_lblc_entry *en; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 32ba37ba72d8..08990192b6ec 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -276,7 +276,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) */ struct ip_vs_lblcr_entry { struct list_head list; - __u32 addr; /* destination IP address */ + __be32 addr; /* destination IP address */ struct ip_vs_dest_set set; /* destination server set */ unsigned long lastuse; /* last used time */ }; @@ -348,7 +348,7 @@ static struct ctl_table_header * sysctl_header; * new/free a ip_vs_lblcr_entry, which is a mapping of a destination * IP address to a server. */ -static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr) +static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) { struct ip_vs_lblcr_entry *en; @@ -381,7 +381,7 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) /* * Returns hash value for IPVS LBLCR entry */ -static inline unsigned ip_vs_lblcr_hashkey(__u32 addr) +static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; } @@ -420,7 +420,7 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) * Get ip_vs_lblcr_entry associated with supplied parameters. */ static inline struct ip_vs_lblcr_entry * -ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr) +ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) { unsigned hash; struct ip_vs_lblcr_entry *en; diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 867d4e9c6594..c4528b5c800d 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -176,7 +176,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, pp->name, NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); else { - __u16 _ports[2], *pptr + __be16 _ports[2], *pptr ; pptr = skb_header_pointer(skb, offset + ih->ihl*4, sizeof(_ports), _ports); diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index bc28b1160a3a..bfe779e74590 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -29,7 +29,7 @@ static struct ip_vs_conn * tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -50,7 +50,7 @@ static struct ip_vs_conn * tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -112,12 +112,12 @@ tcp_conn_schedule(struct sk_buff *skb, static inline void -tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip, - u16 oldport, u16 newport) +tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, + __be16 oldport, __be16 newport) { tcph->check = ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ 0xFFFF, + ip_vs_check_diff(oldport ^ htonl(0xFFFF), newport, tcph->check)); } @@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - tcphoff, skb->nh.iph->protocol, skb->csum)) { @@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 89d9175d8f28..54aa7603591f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -29,7 +29,7 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -54,7 +54,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ports), _ports); @@ -117,15 +117,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip, - u16 oldport, u16 newport) +udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, + __be16 oldport, __be16 newport) { uhdr->check = ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ 0xFFFF, + ip_vs_check_diff(oldport ^ htonl(0xFFFF), newport, uhdr->check)); if (!uhdr->check) - uhdr->check = 0xFFFF; + uhdr->check = htonl(0xFFFF); } static int @@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = 0xFFFF; + udph->check = htonl(0xFFFF); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); @@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - udphoff, @@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } } diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 7775e6cc68be..338668f88fe2 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -63,7 +63,7 @@ struct ip_vs_sh_bucket { /* * Returns hash value for IPVS SH entry */ -static inline unsigned ip_vs_sh_hashkey(__u32 addr) +static inline unsigned ip_vs_sh_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK; } @@ -73,7 +73,7 @@ static inline unsigned ip_vs_sh_hashkey(__u32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr) +ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr) { return (tbl[ip_vs_sh_hashkey(addr)]).dest; } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 1bca714bda3d..6ab57d72b615 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -48,16 +48,16 @@ struct ip_vs_sync_conn { /* Protocol, addresses and port numbers */ __u8 protocol; /* Which protocol (TCP/UDP) */ - __u16 cport; - __u16 vport; - __u16 dport; - __u32 caddr; /* client address */ - __u32 vaddr; /* virtual address */ - __u32 daddr; /* destination address */ + __be16 cport; + __be16 vport; + __be16 dport; + __be32 caddr; /* client address */ + __be32 vaddr; /* virtual address */ + __be32 daddr; /* destination address */ /* Flags and state transition */ - __u16 flags; /* status flags */ - __u16 state; /* state info */ + __be16 flags; /* status flags */ + __be16 state; /* state info */ /* The sequence options start here */ }; @@ -464,7 +464,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname) { struct net_device *dev; - u32 addr; + __be32 addr; struct sockaddr_in sin; if ((dev = __dev_get_by_name(ifname)) == NULL) diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 52c12e9edbbc..e1f77bd7c9a5 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -232,7 +232,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { - __u16 _pt, *p; + __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index db67373f9b34..252e837b17a5 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c index 5249dbe7c559..b8c289f247cb 100644 --- a/net/ipv4/multipath_random.c +++ b/net/ipv4/multipath_random.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c index b6cd2870478f..bba5abe5542d 100644 --- a/net/ipv4/multipath_rr.c +++ b/net/ipv4/multipath_rr.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c index 342d0b9098f5..92b04823e034 100644 --- a/net/ipv4/multipath_wrandom.c +++ b/net/ipv4/multipath_wrandom.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> @@ -61,8 +60,8 @@ struct multipath_dest { struct list_head list; const struct fib_nh *nh_info; - __u32 netmask; - __u32 network; + __be32 netmask; + __be32 network; unsigned char prefixlen; struct rcu_head rcu; @@ -77,7 +76,7 @@ struct multipath_route { struct list_head list; int oif; - __u32 gw; + __be32 gw; struct list_head dests; struct rcu_head rcu; @@ -129,8 +128,8 @@ static unsigned char __multipath_lookup_weight(const struct flowi *fl, /* find state entry for destination */ list_for_each_entry_rcu(d, &target_route->dests, list) { - __u32 targetnetwork = fl->fl4_dst & - (0xFFFFFFFF >> (32 - d->prefixlen)); + __be32 targetnetwork = fl->fl4_dst & + inet_make_mask(d->prefixlen); if ((targetnetwork & d->netmask) == d->network) { weight = d->nh_info->nh_weight; @@ -218,8 +217,8 @@ static void wrandom_select_route(const struct flowi *flp, *rp = decision; } -static void wrandom_set_nhinfo(__u32 network, - __u32 netmask, +static void wrandom_set_nhinfo(__be32 network, + __be32 netmask, unsigned char prefixlen, const struct fib_nh *nh) { diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6a9e34b794bc..5ac15379a0cf 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -128,8 +128,8 @@ EXPORT_SYMBOL(ip_nat_decode_session); */ struct ip_rt_info { - u_int32_t daddr; - u_int32_t saddr; + __be32 daddr; + __be32 saddr; u_int8_t tos; }; @@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) break; if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e1d7f5fbc526..a55b8ff70ded 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_DSCP - tristate "DSCP match support" - depends on IP_NF_IPTABLES - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_AH tristate "AH match support" depends on IP_NF_IPTABLES @@ -332,7 +321,7 @@ config IP_NF_MATCH_HASHLIMIT help This option adds a new iptables `hashlimit' match. - As opposed to `limit', this match dynamically crates a hash table + As opposed to `limit', this match dynamically creates a hash table of limit buckets, based on your selection of source/destination ip addresses and/or ports. @@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_DSCP - tristate "DSCP target support" - depends on IP_NF_MANGLE - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_TTL tristate 'TTL target support' depends on IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3ded4a3af59c..09aaed1a8063 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o @@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d0d19192026d..17e1a687ab45 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -9,7 +9,6 @@ * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -57,8 +56,6 @@ do { \ #define ARP_NF_ASSERT(x) #endif -#include <linux/netfilter_ipv4/listhelp.h> - static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, char *hdr_addr, int len) { @@ -83,7 +80,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, { char *arpptr = (char *)(arphdr + 1); char *src_devaddr, *tgt_devaddr; - u32 src_ipaddr, tgt_ipaddr; + __be32 src_ipaddr, tgt_ipaddr; int i, ret; #define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg)) @@ -209,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("arp_tables: error: '%s'\n", (char *)targinfo); @@ -227,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata) + struct arpt_table *table) { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; @@ -237,7 +232,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + @@ -249,6 +244,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, outdev = out ? out->name : nulldevname; read_lock_bh(&table->lock); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); /* Target might have changed stuff. */ arp = (*pskb)->nh.arph; @@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i if (t->u.kernel.target == &arpt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto out; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) t = arpt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -1120,7 +1112,8 @@ int arpt_register_table(struct arpt_table *table, return ret; } - if (xt_register_table(table, &bootstrap, newinfo) != 0) { + ret = xt_register_table(table, &bootstrap, newinfo); + if (ret != 0) { xt_free_table_info(newinfo); return ret; } @@ -1170,21 +1163,34 @@ static int __init arp_tables_init(void) { int ret; - xt_proto_init(NF_ARP); + ret = xt_proto_init(NF_ARP); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&arpt_standard_target); - xt_register_target(&arpt_error_target); + ret = xt_register_target(&arpt_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&arpt_error_target); + if (ret < 0) + goto err3; /* Register setsockopt */ ret = nf_register_sockopt(&arpt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err4; printk("arp_tables: (C) 2002 David S. Miller\n"); return 0; + +err4: + xt_unregister_target(&arpt_error_target); +err3: + xt_unregister_target(&arpt_standard_target); +err2: + xt_proto_fini(NF_ARP); +err1: + return ret; } static void __exit arp_tables_fini(void) diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a58325c1ceb9..d12b1df252a1 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -11,7 +11,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { const struct arpt_mangle *mangle = targinfo; struct arphdr *arp; @@ -67,7 +67,7 @@ target(struct sk_buff **pskb, static int checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int targinfosize, unsigned int hook_mask) + void *targinfo, unsigned int hook_mask) { const struct arpt_mangle *mangle = targinfo; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index d7c472faa53b..7edea2a1696c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return arpt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 0a7bd7f04061..6c7383a8e42b 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -155,11 +155,11 @@ static int help(struct sk_buff **pskb, exp->tuple.dst.protonum = IPPROTO_TCP; exp->tuple.dst.u.tcp.port = htons(port); - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); exp->mask.dst.protonum = 0xFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.u.tcp.port = htons(0xFFFF); if (ip_nat_amanda_hook) ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 7e4cf9a4d15f..143c4668538b 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -17,7 +17,6 @@ * - export ip_conntrack[_expect]_{find_get,put} functions * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/icmp.h> #include <linux/ip.h> @@ -48,7 +47,6 @@ #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> #define IP_CONNTRACK_VERSION "2.4" @@ -65,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; +struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; static LIST_HEAD(helpers); -unsigned int ip_conntrack_htable_size = 0; -int ip_conntrack_max; -struct list_head *ip_conntrack_hash; +unsigned int ip_conntrack_htable_size __read_mostly = 0; +int ip_conntrack_max __read_mostly; +struct list_head *ip_conntrack_hash __read_mostly; static kmem_cache_t *ip_conntrack_cachep __read_mostly; static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; -unsigned int ip_ct_log_invalid; +unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); -static int ip_conntrack_vmalloc; +static int ip_conntrack_vmalloc __read_mostly; static unsigned int ip_conntrack_next_id; static unsigned int ip_conntrack_expect_next_id; @@ -151,8 +149,8 @@ static unsigned int ip_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, unsigned int size, unsigned int rnd) { - return (jhash_3words(tuple->src.ip, - (tuple->dst.ip ^ tuple->dst.protonum), + return (jhash_3words((__force u32)tuple->src.ip, + ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), rnd) % size); } @@ -295,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) static void clean_from_lists(struct ip_conntrack *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&ip_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ ip_ct_remove_expectations(ct); @@ -314,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; + struct ip_conntrack_helper *helper; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -322,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); + helper = ct->helper; + if (helper && helper->destroy) + helper->destroy(ct); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ @@ -368,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack) ip_conntrack_put(ct); } -static inline int -conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - ASSERT_READ_LOCK(&ip_conntrack_lock); - return tuplehash_to_ctrack(i) != ignored_conntrack - && ip_ct_tuple_equal(tuple, &i->tuple); -} - struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) @@ -387,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (tuplehash_to_ctrack(h) != ignored_conntrack && + ip_ct_tuple_equal(tuple, &h->tuple)) { CONNTRACK_STAT_INC(found); return h; } @@ -418,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &ip_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &ip_conntrack_hash[repl_hash]); } void ip_conntrack_hash_insert(struct ip_conntrack *ct) @@ -441,6 +430,7 @@ int __ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -471,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &ip_conntrack_hash[hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __ip_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + CONNTRACK_STAT_INC(insert); + write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + return NF_ACCEPT; +out: CONNTRACK_STAT_INC(insert_failed); write_unlock_bh(&ip_conntrack_lock); - return NF_DROP; } @@ -528,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; + struct ip_conntrack *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { - ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&ip_conntrack_lock); @@ -560,18 +548,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) -{ - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct ip_conntrack_helper * __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct ip_conntrack_helper * @@ -641,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, ip_conntrack_hash_rnd_initted = 1; } + /* We don't want any race condition at early drop stage */ + atomic_inc(&ip_conntrack_count); + if (ip_conntrack_max - && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { + atomic_dec(&ip_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" @@ -657,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); + atomic_dec(&ip_conntrack_count); return ERR_PTR(-ENOMEM); } @@ -670,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - atomic_inc(&ip_conntrack_count); - return conntrack; } @@ -1063,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&ip_conntrack_lock); return 0; @@ -1082,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static inline void unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } - return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -1109,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); - for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < ip_conntrack_htable_size; i++) { + list_for_each_entry(h, &ip_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1178,9 +1169,9 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16), &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16), &tuple->dst.u.tcp.port); return 0; @@ -1195,9 +1186,9 @@ int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], return -EINVAL; t->src.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); t->dst.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0; } @@ -1238,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nf_conntrack_get(nskb->nfct); } -static inline int -do_iter(const struct ip_conntrack_tuple_hash *i, - int (*iter)(struct ip_conntrack *i, void *data), - void *data) -{ - return iter(tuplehash_to_ctrack(i), data); -} - /* Bring out ya dead! */ -static struct ip_conntrack_tuple_hash * +static struct ip_conntrack * get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + } + list_for_each_entry(h, &unconfirmed, list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); + return NULL; - return h; +found: + atomic_inc(&ct->ct_general.use); + write_unlock_bh(&ip_conntrack_lock); + return ct; } void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) { - struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct ip_conntrack *ct = tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 4dcf526c3944..93dcf960662f 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> @@ -426,8 +425,8 @@ static int help(struct sk_buff **pskb, exp->tuple.src.u.tcp.port = 0; /* Don't care. */ exp->tuple.dst.protonum = IPPROTO_TCP; exp->mask = ((struct ip_conntrack_tuple) - { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); + { { htonl(0xFFFFFFFF), { 0 } }, + { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; @@ -489,7 +488,7 @@ static int __init ip_conntrack_ftp_init(void) for (i = 0; i < ports_c; i++) { ftp[i].tuple.src.u.tcp.port = htons(ports[i]); ftp[i].tuple.dst.protonum = IPPROTO_TCP; - ftp[i].mask.src.u.tcp.port = 0xFFFF; + ftp[i].mask.src.u.tcp.port = htons(0xFFFF); ftp[i].mask.dst.protonum = 0xFF; ftp[i].max_expected = 1; ftp[i].timeout = 5 * 60; /* 5 minutes */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c index 0665674218c6..7b7441202bfd 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c @@ -11,7 +11,6 @@ * For more information, please see http://nath323.sourceforge.net/ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> @@ -50,11 +49,11 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " int (*set_h245_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); int (*set_h225_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); int (*set_sig_addr_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, @@ -210,7 +209,7 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct, /****************************************************************************/ static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr, - u_int32_t * ip, u_int16_t * port) + __be32 * ip, u_int16_t * port) { unsigned char *p; @@ -233,7 +232,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; u_int16_t rtp_port; struct ip_conntrack_expect *rtp_exp; @@ -255,10 +254,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; rtp_exp->tuple.dst.u.udp.port = htons(rtp_port); rtp_exp->tuple.dst.protonum = IPPROTO_UDP; - rtp_exp->mask.src.ip = 0xFFFFFFFF; + rtp_exp->mask.src.ip = htonl(0xFFFFFFFF); rtp_exp->mask.src.u.udp.port = 0; - rtp_exp->mask.dst.ip = 0xFFFFFFFF; - rtp_exp->mask.dst.u.udp.port = 0xFFFF; + rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF); + rtp_exp->mask.dst.u.udp.port = htons(0xFFFF); rtp_exp->mask.dst.protonum = 0xFF; rtp_exp->flags = 0; @@ -272,10 +271,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1); rtcp_exp->tuple.dst.protonum = IPPROTO_UDP; - rtcp_exp->mask.src.ip = 0xFFFFFFFF; + rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF); rtcp_exp->mask.src.u.udp.port = 0; - rtcp_exp->mask.dst.ip = 0xFFFFFFFF; - rtcp_exp->mask.dst.u.udp.port = 0xFFFF; + rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF); + rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF); rtcp_exp->mask.dst.protonum = 0xFF; rtcp_exp->flags = 0; @@ -326,7 +325,7 @@ static int expect_t120(struct sk_buff **pskb, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -343,10 +342,10 @@ static int expect_t120(struct sk_buff **pskb, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */ @@ -627,7 +626,7 @@ void ip_conntrack_h245_expect(struct ip_conntrack *new, /****************************************************************************/ int get_h225_addr(unsigned char *data, TransportAddress * addr, - u_int32_t * ip, u_int16_t * port) + __be32 * ip, u_int16_t * port) { unsigned char *p; @@ -649,7 +648,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -666,10 +665,10 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; @@ -710,7 +709,7 @@ static int expect_callforwarding(struct sk_buff **pskb, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -752,10 +751,10 @@ static int expect_callforwarding(struct sk_buff **pskb, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; @@ -792,7 +791,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret; int i; - u_int32_t ip; + __be32 ip; u_int16_t port; DEBUGP("ip_ct_q931: Setup\n"); @@ -1189,7 +1188,7 @@ static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen) /****************************************************************************/ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { struct ip_conntrack_expect *exp; struct ip_conntrack_tuple tuple; @@ -1201,7 +1200,7 @@ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct, tuple.dst.protonum = IPPROTO_TCP; exp = __ip_conntrack_expect_find(&tuple); - if (exp->master == ct) + if (exp && exp->master == ct) return exp; return NULL; } @@ -1229,7 +1228,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1252,10 +1251,10 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = gkrouted_only ? 0xFFFFFFFF : 0; + exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0; exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */ @@ -1308,7 +1307,7 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1334,10 +1333,10 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_UDP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; exp->expectfn = ip_conntrack_ras_expect; @@ -1478,7 +1477,7 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct, { struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); - u_int32_t ip; + __be32 ip; u_int16_t port; DEBUGP("ip_ct_ras: ARQ\n"); @@ -1514,7 +1513,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1539,10 +1538,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; exp->expectfn = ip_conntrack_q931_expect; @@ -1582,7 +1581,7 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -1599,10 +1598,10 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; exp->expectfn = ip_conntrack_q931_expect; diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 8ccfe17bb253..a2af5e0c7f99 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -20,11 +20,11 @@ * - We can only support one single call within each session * * TODO: - * - testing of incoming PPTP calls + * - testing of incoming PPTP calls * - * Changes: + * Changes: * 2002-02-05 - Version 1.3 - * - Call ip_conntrack_unexpect_related() from + * - Call ip_conntrack_unexpect_related() from * pptp_destroy_siblings() to destroy expectations in case * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen * (Philip Craig <philipc@snapgear.com>) @@ -46,7 +46,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> @@ -81,7 +80,7 @@ int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -int +void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply); @@ -142,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, invert_tuplepr(&inv_t, &exp->tuple); DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&inv_t); - + exp_other = ip_conntrack_expect_find(&inv_t); if (exp_other) { /* delete other expectation. */ @@ -195,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) { struct ip_conntrack_tuple t; - /* Since ct->sibling_list has literally rusted away in 2.6.11, + ip_ct_gre_keymap_destroy(ct); + /* Since ct->sibling_list has literally rusted away in 2.6.11, * we now need another way to find out about our sibling * contrack and expects... -HW */ /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout original pns->pac ct/exp\n"); @@ -211,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout reply pac->pns ct/exp\n"); @@ -220,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int -exp_gre(struct ip_conntrack *master, - u_int32_t seq, +exp_gre(struct ip_conntrack *ct, __be16 callid, __be16 peer_callid) { - struct ip_conntrack_tuple inv_tuple; - struct ip_conntrack_tuple exp_tuples[] = { - /* tuple in original direction, PNS->PAC */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, - .u = { .gre = { .key = peer_callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, - .u = { .gre = { .key = callid } }, - .protonum = IPPROTO_GRE - }, - }, - /* tuple in reply direction, PAC->PNS */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, - .u = { .gre = { .key = callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, - .u = { .gre = { .key = peer_callid } }, - .protonum = IPPROTO_GRE - }, - } - }; struct ip_conntrack_expect *exp_orig, *exp_reply; int ret = 1; - exp_orig = ip_conntrack_expect_alloc(master); + exp_orig = ip_conntrack_expect_alloc(ct); if (exp_orig == NULL) goto out; - exp_reply = ip_conntrack_expect_alloc(master); + exp_reply = ip_conntrack_expect_alloc(ct); if (exp_reply == NULL) goto out_put_orig; - memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + /* original direction, PNS->PAC */ + exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp_orig->tuple.src.u.gre.key = peer_callid; + exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + exp_orig->tuple.dst.u.gre.key = callid; + exp_orig->tuple.dst.protonum = IPPROTO_GRE; - exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.ip = htonl(0xffffffff); exp_orig->mask.src.u.all = 0; - exp_orig->mask.dst.u.all = 0; exp_orig->mask.dst.u.gre.key = htons(0xffff); - exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.ip = htonl(0xffffffff); exp_orig->mask.dst.protonum = 0xff; - - exp_orig->master = master; + + exp_orig->master = ct; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; /* both expectations are identical apart from tuple */ memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); - memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); - if (ip_nat_pptp_hook_exp_gre) - ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); - else { - - DEBUGP("calling expect_related PNS->PAC"); - DUMP_TUPLE(&exp_orig->tuple); - - if (ip_conntrack_expect_related(exp_orig) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_put_both; - } + /* reply direction, PAC->PNS */ + exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + exp_reply->tuple.src.u.gre.key = callid; + exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + exp_reply->tuple.dst.u.gre.key = peer_callid; + exp_reply->tuple.dst.protonum = IPPROTO_GRE; - DEBUGP("calling expect_related PAC->PNS"); - DUMP_TUPLE(&exp_reply->tuple); - - if (ip_conntrack_expect_related(exp_reply) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_unexpect_orig; - } - - /* Add GRE keymap entries */ - if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { - DEBUGP("cannot keymap_add() exp\n"); - goto out_unexpect_both; - } - - invert_tuplepr(&inv_tuple, &exp_reply->tuple); - if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { - ip_ct_gre_keymap_destroy(master); - DEBUGP("cannot keymap_add() exp_inv\n"); - goto out_unexpect_both; - } - ret = 0; + if (ip_nat_pptp_hook_exp_gre) + ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + if (ip_conntrack_expect_related(exp_orig) != 0) + goto out_put_both; + if (ip_conntrack_expect_related(exp_reply) != 0) + goto out_unexpect_orig; + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) + goto out_unexpect_both; + if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(ct); + goto out_unexpect_both; } + ret = 0; out_put_both: ip_conntrack_expect_put(exp_reply); @@ -323,73 +292,36 @@ out_unexpect_orig: goto out_put_both; } -static inline int +static inline int pptp_inbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 *cid, *pcid; - u_int32_t seq; - - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } + __be16 cid = 0, pcid = 0; msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.srep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms new control session */ - if (info->sstate < PPTP_SESSION_REQUESTED) { - DEBUGP("%s without START_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate < PPTP_SESSION_REQUESTED) + goto invalid; if (pptpReq->srep.resultCode == PPTP_START_OK) info->sstate = PPTP_SESSION_CONFIRMED; - else + else info->sstate = PPTP_SESSION_ERROR; break; case PPTP_STOP_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.strep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms end of control session */ - if (info->sstate > PPTP_SESSION_STOPREQ) { - DEBUGP("%s without STOP_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate > PPTP_SESSION_STOPREQ) + goto invalid; if (pptpReq->strep.resultCode == PPTP_STOP_OK) info->sstate = PPTP_SESSION_NONE; else @@ -397,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.ocack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server accepted call, we now expect GRE frames */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; if (info->cstate != PPTP_CALL_OUT_REQ && - info->cstate != PPTP_CALL_OUT_CONF) { - DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); - break; - } - if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate != PPTP_CALL_OUT_CONF) + goto invalid; + + cid = pptpReq->ocack.callID; + pcid = pptpReq->ocack.peersCallID; + if (info->pns_call_id != pcid) + goto invalid; + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); + + if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_OUT_CONF; + info->pac_call_id = cid; + exp_gre(ct, cid, pcid); + } else info->cstate = PPTP_CALL_NONE; - break; - } - - cid = &pptpReq->ocack.callID; - pcid = &pptpReq->ocack.peersCallID; - - info->pac_call_id = ntohs(*cid); - - if (htons(info->pns_call_id) != *pcid) { - DEBUGP("%s for unknown callid %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(*cid), ntohs(*pcid)); - - info->cstate = PPTP_CALL_OUT_CONF; - - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); break; case PPTP_IN_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server tells us about incoming call request */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - pcid = &pptpReq->icack.peersCallID; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + + cid = pptpReq->icreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = ntohs(*pcid); + info->pac_call_id = cid; break; case PPTP_IN_CALL_CONNECT: - if (reqlen < sizeof(_pptpReq.iccon)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server tells us about incoming call established */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - if (info->cstate != PPTP_CALL_IN_REP - && info->cstate != PPTP_CALL_IN_CONF) { - DEBUGP("%s but never sent IN_CALL_REPLY\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + if (info->cstate != PPTP_CALL_IN_REP && + info->cstate != PPTP_CALL_IN_CONF) + goto invalid; - pcid = &pptpReq->iccon.peersCallID; - cid = &info->pac_call_id; + pcid = pptpReq->iccon.peersCallID; + cid = info->pac_call_id; - if (info->pns_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } + if (info->pns_call_id != pcid) + goto invalid; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); - + exp_gre(ct, cid, pcid); break; case PPTP_CALL_DISCONNECT_NOTIFY: - if (reqlen < sizeof(_pptpReq.disc)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms disconnect */ - cid = &pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + cid = pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -514,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_WAN_ERROR_NOTIFY: - break; - case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) - ? pptp_msg_name[msg]:pptp_msg_name[0], msg); - break; + goto invalid; } - if (ip_nat_pptp_hook_inbound) return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, pptpReq); - return NF_ACCEPT; +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + return NF_ACCEPT; } static inline int pptp_outbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 *cid, *pcid; - - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) - return NF_ACCEPT; - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) - return NF_ACCEPT; + __be16 cid = 0, pcid = 0; msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); @@ -569,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, switch (msg) { case PPTP_START_SESSION_REQUEST: /* client requests for new control session */ - if (info->sstate != PPTP_SESSION_NONE) { - DEBUGP("%s but we already have one", - pptp_msg_name[msg]); - } + if (info->sstate != PPTP_SESSION_NONE) + goto invalid; info->sstate = PPTP_SESSION_REQUESTED; break; case PPTP_STOP_SESSION_REQUEST: @@ -581,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.ocreq)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - /* FIXME: break; */ - } - /* client initiating connection to server */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ - cid = &pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->pns_call_id = ntohs(*cid); + cid = pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + info->pns_call_id = cid; break; case PPTP_IN_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* client answers incoming call */ - if (info->cstate != PPTP_CALL_IN_REQ - && info->cstate != PPTP_CALL_IN_REP) { - DEBUGP("%s without incall_req\n", - pptp_msg_name[msg]); - break; - } - if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { + if (info->cstate != PPTP_CALL_IN_REQ && + info->cstate != PPTP_CALL_IN_REP) + goto invalid; + + cid = pptpReq->icack.callID; + pcid = pptpReq->icack.peersCallID; + if (info->pac_call_id != pcid) + goto invalid; + DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); + + if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = cid; + } else info->cstate = PPTP_CALL_NONE; - break; - } - pcid = &pptpReq->icack.peersCallID; - if (info->pac_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown call %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); - /* part two of the three-way handshake */ - info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = ntohs(pptpReq->icack.callID); break; case PPTP_CALL_CLEAR_REQUEST: /* client requests hangup of call */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("CLEAR_CALL but no session\n"); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; /* FUTURE: iterate over all calls and check if * call ID is valid. We don't do this without newnat, * because we only know about last call */ info->cstate = PPTP_CALL_CLEAR_REQ; break; case PPTP_SET_LINK_INFO: - break; case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0], msg); - /* unknown: no need to create GRE masq table entry */ - break; + goto invalid; } - + if (ip_nat_pptp_hook_outbound) return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return NF_ACCEPT; +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } +static const unsigned int pptp_msg_size[] = { + [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), + [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), + [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), + [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), + [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), + [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), + [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), + [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), + [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), + [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), + [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), + [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), + [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), +}; /* track caller id inside control connection, call expect_related */ -static int +static int conntrack_pptp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct pptp_pkt_hdr _pptph, *pptph; - struct tcphdr _tcph, *tcph; - u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; - u_int32_t datalen; int dir = CTINFO2DIR(ctinfo); struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - unsigned int nexthdr_off; - + struct tcphdr _tcph, *tcph; + struct pptp_pkt_hdr _pptph, *pptph; + struct PptpControlHeader _ctlh, *ctlh; + union pptp_ctrl_union _pptpReq, *pptpReq; + unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; + u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { DEBUGP("ctinfo = %u, skipping\n", ctinfo); return NF_ACCEPT; } - + nexthdr_off = (*pskb)->nh.iph->ihl*4; tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - if (tcph->fin || tcph->rst) { - DEBUGP("RST/FIN received, timeouting GRE\n"); - /* can't do this after real newnat */ - info->cstate = PPTP_CALL_NONE; - - /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); - } - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { DEBUGP("no full PPTP header, can't track\n"); @@ -713,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb, return NF_ACCEPT; } + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + msg = ntohs(ctlh->messageType); + if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) + return NF_ACCEPT; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + oldsstate = info->sstate; oldcstate = info->cstate; @@ -722,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); DEBUGP("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); @@ -736,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb, } /* control protocol helper */ -static struct ip_conntrack_helper pptp = { +static struct ip_conntrack_helper pptp = { .list = { NULL, NULL }, - .name = "pptp", + .name = "pptp", .me = THIS_MODULE, .max_expected = 2, .timeout = 5 * 60, - .tuple = { .src = { .ip = 0, - .u = { .tcp = { .port = - __constant_htons(PPTP_CONTROL_PORT) } } - }, - .dst = { .ip = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, .protonum = IPPROTO_TCP - } + } }, - .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = __constant_htons(0xffff) } } - }, - .dst = { .ip = 0, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, - .protonum = 0xff - } + .protonum = 0xff + } }, - .help = conntrack_pptp_help + .help = conntrack_pptp_help, + .destroy = pptp_destroy_siblings, }; extern void ip_ct_proto_gre_fini(void); @@ -769,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void); static int __init ip_conntrack_helper_pptp_init(void) { int retcode; - + retcode = ip_ct_proto_gre_init(); if (retcode < 0) return retcode; diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index a2ac5ce544b2..75f7c3db1619 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -22,7 +22,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> @@ -219,7 +218,8 @@ static int help(struct sk_buff **pskb, IPPROTO_TCP }}); exp->mask = ((struct ip_conntrack_tuple) { { 0, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); + { htonl(0xFFFFFFFF), + { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; if (ip_nat_irc_hook) @@ -267,7 +267,7 @@ static int __init ip_conntrack_irc_init(void) hlpr = &irc_helpers[i]; hlpr->tuple.src.u.tcp.port = htons(ports[i]); hlpr->tuple.dst.protonum = IPPROTO_TCP; - hlpr->mask.src.u.tcp.port = 0xFFFF; + hlpr->mask.src.u.tcp.port = htons(0xFFFF); hlpr->mask.dst.protonum = 0xFF; hlpr->max_expected = max_dcc_channels; hlpr->timeout = dcc_timeout; diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index a566a81325b2..a1d6a89f64aa 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -21,6 +21,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> +#include <linux/if_addr.h> #include <linux/in.h> #include <linux/ip.h> #include <net/route.h> @@ -47,7 +48,7 @@ static int help(struct sk_buff **pskb, struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt = (struct rtable *)(*pskb)->dst; struct in_device *in_dev; - u_int32_t mask = 0; + __be32 mask = 0; /* we're only interested in locally generated packets */ if ((*pskb)->sk == NULL) @@ -77,12 +78,12 @@ static int help(struct sk_buff **pskb, goto out; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->tuple.src.u.udp.port = ntohs(NMBD_PORT); + exp->tuple.src.u.udp.port = htons(NMBD_PORT); exp->mask.src.ip = mask; - exp->mask.src.u.udp.port = 0xFFFF; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.src.u.udp.port = htons(0xFFFF); + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; @@ -114,7 +115,7 @@ static struct ip_conntrack_helper helper = { .src = { .u = { .udp = { - .port = 0xFFFF, + .port = __constant_htons(0xFFFF), } } }, diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 33891bb1fde4..53b6dffea6c2 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -78,8 +78,8 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb, { struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip); NFA_NEST_END(skb, nest_parms); @@ -110,7 +110,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb, static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t status = htonl((u_int32_t) ct->status); + __be32 status = htonl((u_int32_t) ct->status); NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); return 0; @@ -122,7 +122,7 @@ static inline int ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) { long timeout_l = ct->timeout.expires - jiffies; - u_int32_t timeout; + __be32 timeout; if (timeout_l < 0) timeout = 0; @@ -192,13 +192,13 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nfattr *nest_count = NFA_NEST(skb, type); - u_int32_t tmp; + __be32 tmp; tmp = htonl(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp); tmp = htonl(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp); NFA_NEST_END(skb, nest_count); @@ -215,9 +215,9 @@ nfattr_failure: static inline int ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t mark = htonl(ct->mark); + __be32 mark = htonl(ct->mark); - NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark); return 0; nfattr_failure: @@ -230,8 +230,8 @@ nfattr_failure: static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t id = htonl(ct->id); - NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + __be32 id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(__be32), &id); return 0; nfattr_failure: @@ -241,9 +241,9 @@ nfattr_failure: static inline int ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t use = htonl(atomic_read(&ct->ct_general.use)); + __be32 use = htonl(atomic_read(&ct->ct_general.use)); - NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + NFA_PUT(skb, CTA_USE, sizeof(__be32), &use); return 0; nfattr_failure: @@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | - IPCT_PROTOINFO | - IPCT_HELPER | - IPCT_HELPINFO | - IPCT_NATINFO)) { + } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; + if (events & IPCT_MARK + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; @@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&ip_conntrack_lock); + last = (struct ip_conntrack *)cb->args[1]; for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) { restart: - last = (struct ip_conntrack *)cb->args[1]; list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = tuplehash_to_ctrack(h); - if (last != NULL) { - if (ct == last) { - ip_conntrack_put(last); - cb->args[1] = 0; - last = NULL; - } else + if (cb->args[1]) { + if (ct != last) continue; + cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -439,64 +436,29 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } +#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) + memset(&ct->counters, 0, sizeof(ct->counters)); +#endif } - if (last != NULL) { - ip_conntrack_put(last); + if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: read_unlock_bh(&ip_conntrack_lock); + if (last) + ip_conntrack_put(last); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} - -#ifdef CONFIG_IP_NF_CT_ACCT -static int -ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ip_conntrack *ct = NULL; - struct ip_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - - DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, - cb->args[0], *id); - - write_lock_bh(&ip_conntrack_lock); - for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { - h = (struct ip_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = tuplehash_to_ctrack(h); - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - - memset(&ct->counters, 0, sizeof(ct->counters)); - } - } -out: - write_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - return skb->len; } -#endif static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), + [CTA_IP_V4_SRC-1] = sizeof(__be32), + [CTA_IP_V4_DST-1] = sizeof(__be32), }; static inline int @@ -513,11 +475,11 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) if (!tb[CTA_IP_V4_SRC-1]) return -EINVAL; - tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); if (!tb[CTA_IP_V4_DST-1]) return -EINVAL; - tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); DEBUGP("leaving\n"); @@ -640,8 +602,8 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, } static const size_t cta_min_nat[CTA_NAT_MAX] = { - [CTA_NAT_MINIP-1] = sizeof(u_int32_t), - [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), + [CTA_NAT_MINIP-1] = sizeof(__be32), + [CTA_NAT_MAXIP-1] = sizeof(__be32), }; static inline int @@ -661,12 +623,12 @@ ctnetlink_parse_nat(struct nfattr *nat, return -EINVAL; if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); if (!tb[CTA_NAT_MAXIP-1]) range->max_ip = range->min_ip; else - range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); if (range->min_ip) range->flags |= IP_NAT_RANGE_MAP_IPS; @@ -701,11 +663,11 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) } static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) + [CTA_STATUS-1] = sizeof(__be32), + [CTA_TIMEOUT-1] = sizeof(__be32), + [CTA_MARK-1] = sizeof(__be32), + [CTA_USE-1] = sizeof(__be32), + [CTA_ID-1] = sizeof(__be32) }; static int @@ -744,7 +706,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = tuplehash_to_ctrack(h); if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); if (ct->id != id) { ip_conntrack_put(ct); return -ENOENT; @@ -778,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (msg->nfgen_family != AF_INET) return -EAFNOSUPPORT; - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { -#ifdef CONFIG_IP_NF_CT_ACCT - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table_w, - ctnetlink_done)) != 0) - return -EINVAL; -#else +#ifndef CONFIG_IP_NF_CT_ACCT + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) return -ENOTSUPP; #endif - } else { - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; - } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) @@ -854,7 +808,7 @@ static inline int ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) { unsigned long d; - unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); + unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -949,7 +903,7 @@ ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) static inline int ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) { - u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); if (!del_timer(&ct->timeout)) return -ETIME; @@ -1012,7 +966,7 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) #if defined(CONFIG_IP_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif DEBUGP("all done\n"); @@ -1035,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (!cda[CTA_TIMEOUT-1]) goto err; - ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; @@ -1052,7 +1006,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], #if defined(CONFIG_IP_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif ct->helper = ip_conntrack_helper_find_get(rtuple); @@ -1184,8 +1138,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct ip_conntrack_expect *exp) { struct ip_conntrack *master = exp->master; - u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); - u_int32_t id = htonl(exp->id); + __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); + __be32 id = htonl(exp->id); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nfattr_failure; @@ -1196,8 +1150,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, CTA_EXPECT_MASTER) < 0) goto nfattr_failure; - NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); - NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id); return 0; @@ -1256,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; + if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + return NOTIFY_DONE; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; @@ -1315,8 +1272,8 @@ out: } static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) + [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32), + [CTA_EXPECT_ID-1] = sizeof(__be32) }; static int @@ -1364,7 +1321,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { ip_conntrack_expect_put(exp); return -ENOENT; @@ -1418,8 +1375,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = - *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = + *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { ip_conntrack_expect_put(exp); return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index f891308b5e4c..36f2b5e5d80a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -12,7 +12,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -unsigned int ip_ct_generic_timeout = 600*HZ; +unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 21ee124c0463..5fe026f467d3 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -1,15 +1,15 @@ /* - * ip_conntrack_proto_gre.c - Version 3.0 + * ip_conntrack_proto_gre.c - Version 3.0 * * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -23,7 +23,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/timer.h> @@ -38,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) -#include <linux/netfilter_ipv4/listhelp.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> @@ -63,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); #define DEBUGP(x, args...) #define DUMP_TUPLE_GRE(x) #endif - + /* GRE KEYMAP HANDLING FUNCTIONS */ static LIST_HEAD(gre_keymap_list); @@ -83,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) __be16 key = 0; read_lock_bh(&ip_ct_gre_lock); - km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (km) - key = km->tuple.src.u.gre.key; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t)) { + key = km->tuple.src.u.gre.key; + break; + } + } read_unlock_bh(&ip_ct_gre_lock); - + DEBUGP("lookup src key 0x%x up key for ", key); DUMP_TUPLE_GRE(t); @@ -100,28 +100,25 @@ int ip_ct_gre_keymap_add(struct ip_conntrack *ct, struct ip_conntrack_tuple *t, int reply) { - struct ip_ct_gre_keymap **exist_km, *km, *old; + struct ip_ct_gre_keymap **exist_km, *km; if (!ct->helper || strcmp(ct->helper->name, "pptp")) { DEBUGP("refusing to add GRE keymap to non-pptp session\n"); return -1; } - if (!reply) + if (!reply) exist_km = &ct->help.ct_pptp_info.keymap_orig; else exist_km = &ct->help.ct_pptp_info.keymap_reply; if (*exist_km) { /* check whether it's a retransmission */ - old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (old == *exist_km) { - DEBUGP("retransmission\n"); - return 0; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *exist_km) + return 0; } - - DEBUGP("trying to override keymap_%s for ct %p\n", + DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; } @@ -137,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, DUMP_TUPLE_GRE(&km->tuple); write_lock_bh(&ip_ct_gre_lock); - list_append(&gre_keymap_list, km); + list_add_tail(&km->list, &gre_keymap_list); write_unlock_bh(&ip_ct_gre_lock); return 0; @@ -155,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) write_lock_bh(&ip_ct_gre_lock); if (ct->help.ct_pptp_info.keymap_orig) { - DEBUGP("removing %p from list\n", + DEBUGP("removing %p from list\n", ct->help.ct_pptp_info.keymap_orig); list_del(&ct->help.ct_pptp_info.keymap_orig->list); kfree(ct->help.ct_pptp_info.keymap_orig); @@ -223,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, static int gre_print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple) { - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); } @@ -253,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct, } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); - + return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static int gre_new(struct ip_conntrack *ct, const struct sk_buff *skb) -{ +{ DEBUGP(": "); DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); @@ -286,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct) } /* protocol helper struct */ -static struct ip_conntrack_protocol gre = { +static struct ip_conntrack_protocol gre = { .proto = IPPROTO_GRE, - .name = "gre", + .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, @@ -326,7 +323,7 @@ void ip_ct_proto_gre_fini(void) } write_unlock_bh(&ip_ct_gre_lock); - ip_conntrack_protocol_unregister(&gre); + ip_conntrack_protocol_unregister(&gre); } EXPORT_SYMBOL(ip_ct_gre_keymap_add); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 23f1c504586d..295b6fa340db 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -21,7 +21,7 @@ #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -unsigned int ip_ct_icmp_timeout = 30*HZ; +unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk @@ -261,7 +261,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, static int icmp_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16), &t->src.u.icmp.id); NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); @@ -287,7 +287,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], tuple->dst.u.icmp.code = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); if (tuple->dst.u.icmp.type >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type]) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 0416073c5600..2443322e4128 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; -static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; +static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS; +static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; static const unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ @@ -210,7 +210,7 @@ static int sctp_print_conntrack(struct seq_file *s, for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \ offset < skb->len && \ (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ - offset += (htons(sch->length) + 3) & ~3, count++) + offset += (ntohs(sch->length) + 3) & ~3, count++) /* Some validity checks to make sure the chunks are fine */ static int do_basic_checks(struct ip_conntrack *conntrack, @@ -254,7 +254,7 @@ static int do_basic_checks(struct ip_conntrack *conntrack, } DEBUGP("Basic checks passed\n"); - return 0; + return count == 0; } static int new_state(enum ip_conntrack_dir dir, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index c5c2ce5cdeb8..06e4e8a6dd9f 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -19,7 +19,6 @@ * version 2.2 */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> @@ -49,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int ip_ct_tcp_be_liberal = 0; +int ip_ct_tcp_be_liberal __read_mostly = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose = 3; +int ip_ct_tcp_loose __read_mostly = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int ip_ct_tcp_max_retrans = 3; +int ip_ct_tcp_max_retrans __read_mostly = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -84,19 +83,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned int ip_ct_tcp_timeout_established = 5 DAYS; -unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; -unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; -unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close = 10 SECS; +unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; +unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS; +unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; +unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; +unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; static const unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ @@ -520,8 +519,8 @@ static void tcp_sack(const struct sk_buff *skb, /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED*4 - && *(__u32 *)ptr == - __constant_ntohl((TCPOPT_NOP << 24) + && *(__be32 *)ptr == + __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) @@ -552,7 +551,7 @@ static void tcp_sack(const struct sk_buff *skb, for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { - tmp = ntohl(*((u_int32_t *)(ptr+i)+1)); + tmp = ntohl(*((__be32 *)(ptr+i)+1)); if (after(tmp, *sack)) *sack = tmp; @@ -732,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } @@ -866,8 +867,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because it is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 9b2c16b4d2ff..d0e8a16970ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -18,8 +18,8 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -unsigned int ip_ct_udp_timeout = 30*HZ; -unsigned int ip_ct_udp_timeout_stream = 180*HZ; +unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ; +unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index fc87ce0da40d..f4f75995a9e4 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/skbuff.h> @@ -194,7 +193,7 @@ static int skp_digits_len(const char *dptr, const char *limit, int *shift) /* Simple ipaddr parser.. */ static int parse_ipaddr(const char *cp, const char **endp, - u_int32_t *ipaddr, const char *limit) + __be32 *ipaddr, const char *limit) { unsigned long int val; int i, digit = 0; @@ -228,7 +227,7 @@ static int parse_ipaddr(const char *cp, const char **endp, static int epaddr_len(const char *dptr, const char *limit, int *shift) { const char *aux = dptr; - u_int32_t ip; + __be32 ip; if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) { DEBUGP("ip: %s parse failed.!\n", dptr); @@ -303,7 +302,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen, static int set_expected_rtp(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, - u_int32_t ipaddr, u_int16_t port, + __be32 ipaddr, u_int16_t port, const char *dptr) { struct ip_conntrack_expect *exp; @@ -320,10 +319,10 @@ static int set_expected_rtp(struct sk_buff **pskb, exp->tuple.dst.u.udp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_UDP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.udp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; @@ -350,7 +349,7 @@ static int sip_help(struct sk_buff **pskb, const char *dptr; int ret = NF_ACCEPT; int matchoff, matchlen; - u_int32_t ipaddr; + __be32 ipaddr; u_int16_t port; /* No Data ? */ @@ -440,9 +439,9 @@ static int __init init(void) sip[i].tuple.dst.protonum = IPPROTO_UDP; sip[i].tuple.src.u.udp.port = htons(ports[i]); - sip[i].mask.src.u.udp.port = 0xFFFF; + sip[i].mask.src.u.udp.port = htons(0xFFFF); sip[i].mask.dst.protonum = 0xFF; - sip[i].max_expected = 1; + sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ sip[i].me = THIS_MODULE; sip[i].help = sip_help; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 88445aac3f28..02135756562e 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -12,7 +12,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> @@ -36,7 +35,6 @@ #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -535,6 +533,8 @@ static struct nf_hook_ops ip_conntrack_ops[] = { /* Sysctl support */ +int ip_conntrack_checksum __read_mostly = 1; + #ifdef CONFIG_SYSCTL /* From ip_conntrack_core.c */ @@ -562,15 +562,13 @@ extern unsigned int ip_ct_udp_timeout_stream; /* From ip_conntrack_proto_icmp.c */ extern unsigned int ip_ct_icmp_timeout; -/* From ip_conntrack_proto_icmp.c */ +/* From ip_conntrack_proto_generic.c */ extern unsigned int ip_ct_generic_timeout; /* Log invalid packets of a given protocol */ static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -int ip_conntrack_checksum = 1; - static struct ctl_table_header *ip_ct_sysctl_header; static ctl_table ip_ct_sysctl_table[] = { diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index 7e33d3bed5e3..fe0b634dd377 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -70,10 +70,10 @@ static int tftp_help(struct sk_buff **pskb, return NF_DROP; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->mask.src.ip = 0xffffffff; + exp->mask.src.ip = htonl(0xffffffff); exp->mask.src.u.udp.port = 0; - exp->mask.dst.ip = 0xffffffff; - exp->mask.dst.u.udp.port = 0xffff; + exp->mask.dst.ip = htonl(0xffffffff); + exp->mask.dst.u.udp.port = htons(0xffff); exp->mask.dst.protonum = 0xff; exp->expectfn = NULL; exp->flags = 0; @@ -129,7 +129,7 @@ static int __init ip_conntrack_tftp_init(void) tftp[i].tuple.dst.protonum = IPPROTO_UDP; tftp[i].tuple.src.u.udp.port = htons(ports[i]); tftp[i].mask.dst.protonum = 0xFF; - tftp[i].mask.src.u.udp.port = 0xFFFF; + tftp[i].mask.src.u.udp.port = htons(0xFFFF); tftp[i].max_expected = 1; tftp[i].timeout = 5 * 60; /* 5 minutes */ tftp[i].me = THIS_MODULE; diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1741d555ad0d..4b6260a97408 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -22,9 +22,6 @@ #include <linux/udp.h> #include <linux/jhash.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> @@ -33,7 +30,6 @@ #include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -86,7 +82,7 @@ static inline unsigned int hash_by_src(const struct ip_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words(tuple->src.ip, tuple->src.u.all, + return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all, tuple->dst.protonum, 0) % ip_nat_htable_size; } @@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) write_unlock_bh(&ip_nat_lock); } -/* We do checksum mangling, so if they were wrong before they're still - * wrong. Also works for incomplete packets (eg. ICMP dest - * unreachables.) */ -u_int16_t -ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} -EXPORT_SYMBOL(ip_nat_cheat_check); - /* Is this tuple already taken? (not by us) */ int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, @@ -206,7 +190,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - u_int32_t *var_ipp; + __be32 *var_ipp; /* Host order */ u_int32_t minip, maxip, j; @@ -233,7 +217,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * like this), even across reboots. */ minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); - j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0); + j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0); *var_ipp = htonl(minip + j % (maxip - minip + 1)); } @@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, - iph->check); + iph->check = nf_csum_update(~iph->saddr, target->src.ip, + iph->check); iph->saddr = target->src.ip; } else { - iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, - iph->check); + iph->check = nf_csum_update(~iph->daddr, target->dst.ip, + iph->check); iph->daddr = target->dst.ip; } return 1; @@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct, EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { struct { struct icmphdr icmp; @@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; + enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; @@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { - hdrlen = (*pskb)->nh.iph->ihl * 4; - if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0))) - return 0; - } + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + return 0; /* Must be RELATED */ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || @@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, !manip)) return 0; - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); + } /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ @@ -550,9 +534,9 @@ int ip_nat_port_range_to_nfattr(struct sk_buff *skb, const struct ip_nat_range *range) { - NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), &range->min.tcp.port); - NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), &range->max.tcp.port); return 0; @@ -571,7 +555,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) if (tb[CTA_PROTONAT_PORT_MIN-1]) { ret = 1; range->min.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); } if (!tb[CTA_PROTONAT_PORT_MAX-1]) { @@ -580,7 +564,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) } else { ret = 1; range->max.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); } return ret; diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index 3328fc5c5f50..a71c233d8112 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -34,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper"); static int mangle_rfc959_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -57,7 +57,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, /* |1|132.235.1.2|6275| */ static int mangle_eprt_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ mangle_eprt_packet(struct sk_buff **pskb, /* |1|132.235.1.2|6275| */ static int mangle_epsv_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -98,7 +98,7 @@ mangle_epsv_packet(struct sk_buff **pskb, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t, +static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, unsigned int, unsigned int, struct ip_conntrack *, @@ -120,7 +120,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb, struct ip_conntrack_expect *exp, u32 *seq) { - u_int32_t newip; + __be32 newip; u_int16_t port; int dir = CTINFO2DIR(ctinfo); struct ip_conntrack *ct = exp->master; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 5d506e0564d5..3bf858480558 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -15,7 +15,6 @@ * - make ip_nat_resize_packet more generic (TCP and UDP) * - add ip_nat_mangle_udp_packet */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> #include <linux/types.h> @@ -28,16 +27,12 @@ #include <net/tcp.h> #include <net/udp.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat_protocol.h> #include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -166,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct tcphdr *tcph; - int datalen; + int oldlen, datalen; if (!skb_make_writable(pskb, (*pskb)->len)) return 0; @@ -181,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcph = (void *)iph + iph->ihl*4; + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); datalen = (*pskb)->len - iph->ihl*4; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, datalen, 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, + iph->saddr, iph->daddr, + csum_partial((char *)tcph, + datalen, 0)); + } else + tcph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ htons(0xFFFF), + htons(datalen), + tcph->check, 1); if (rep_len != match_len) { set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); @@ -222,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct udphdr *udph; + int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ iph = (*pskb)->nh.iph; @@ -239,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; udph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - udph->len = htons((*pskb)->len - iph->ihl*4); + datalen = (*pskb)->len - iph->ihl*4; + udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (udph->check) { - int datalen = (*pskb)->len - iph->ihl * 4; + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + return 1; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, csum_partial((char *)udph, datalen, 0)); - } - + if (!udph->check) + udph->check = -1; + } else + udph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ htons(0xFFFF), + htons(datalen), + udph->check, 1); return 1; } EXPORT_SYMBOL(ip_nat_mangle_udp_packet); @@ -268,37 +283,38 @@ sack_adjust(struct sk_buff *skb, struct ip_nat_seq *natseq) { while (sackoff < sackend) { - struct tcp_sack_block *sack; - u_int32_t new_start_seq, new_end_seq; + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; sack = (void *)skb->data + sackoff; if (after(ntohl(sack->start_seq) - natseq->offset_before, natseq->correction_pos)) - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_after; + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); else - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_before; - new_start_seq = htonl(new_start_seq); + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); if (after(ntohl(sack->end_seq) - natseq->offset_before, natseq->correction_pos)) - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_after; + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); else - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_before; - new_end_seq = htonl(new_end_seq); + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = - ip_nat_cheat_check(~sack->start_seq, new_start_seq, - ip_nat_cheat_check(~sack->end_seq, - new_end_seq, - tcph->check)); + tcph->check = nf_proto_csum_update(skb, + ~sack->start_seq, + new_start_seq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(skb, + ~sack->end_seq, + new_end_seq, + tcph->check, 0); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -357,7 +373,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb, enum ip_conntrack_info ctinfo) { struct tcphdr *tcph; - int dir, newseq, newack; + int dir; + __be32 newseq, newack; struct ip_nat_seq *this_way, *other_way; dir = CTINFO2DIR(ctinfo); @@ -370,22 +387,20 @@ ip_nat_seq_adjust(struct sk_buff **pskb, tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = ntohl(tcph->seq) + this_way->offset_after; + newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else - newseq = ntohl(tcph->seq) + this_way->offset_before; - newseq = htonl(newseq); + newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = ntohl(tcph->ack_seq) - other_way->offset_after; + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); else - newack = ntohl(tcph->ack_seq) - other_way->offset_before; - newack = htonl(newack); + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, - ip_nat_cheat_check(~tcph->ack_seq, - newack, - tcph->check)); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, + tcph->check, 0); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c index 419b878fb467..4a7d34466ee2 100644 --- a/net/ipv4/netfilter/ip_nat_helper_h323.c +++ b/net/ipv4/netfilter/ip_nat_helper_h323.c @@ -32,13 +32,13 @@ /****************************************************************************/ static int set_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, - unsigned int addroff, u_int32_t ip, u_int16_t port) + unsigned int addroff, __be32 ip, u_int16_t port) { enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo); struct { - u_int32_t ip; - u_int16_t port; + __be32 ip; + __be16 port; } __attribute__ ((__packed__)) buf; struct tcphdr _tcph, *th; @@ -86,7 +86,7 @@ static int set_addr(struct sk_buff **pskb, static int set_h225_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port); } @@ -95,7 +95,7 @@ static int set_h225_addr(struct sk_buff **pskb, static int set_h245_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { return set_addr(pskb, data, dataoff, addr->unicastAddress.iPAddress.network, ip, port); @@ -110,7 +110,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); int i; - u_int32_t ip; + __be32 ip; u_int16_t port; for (i = 0; i < count; i++) { @@ -164,7 +164,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int i; - u_int32_t ip; + __be32 ip; u_int16_t port; for (i = 0; i < count; i++) { @@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = port; - u_int32_t ip; + __be32 ip; /* Set expectations for NAT */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index f3977726ff09..329fdcd7d702 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -32,10 +32,9 @@ * 2005-06-10 - Version 3.0 * - kernel >= 2.6.11 version, * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * + * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -52,7 +51,7 @@ #define IP_NAT_PPTP_VERSION "3.0" -#define REQ_CID(req, off) (*(u_int16_t *)((char *)(req) + (off))) +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); @@ -86,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = - htons(master->nat.help.nat_pptp_info.pns_call_id); + t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = - htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; t.dst.protonum = IPPROTO_GRE; } @@ -150,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_callid; + u_int16_t msg; + __be16 new_callid; unsigned int cid_off; - new_callid = htons(ct_pptp_info->pns_call_id); - + new_callid = ct_pptp_info->pns_call_id; + switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); - /* FIXME: ideally we would want to reserve a call ID - * here. current netfilter NAT core is not able to do - * this :( For now we use TCP source port. This breaks - * multiple calls within one control session */ - - /* save original call ID in nat_info */ - nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - - /* don't use tcph->source since we are at a DSTmanip - * hook (e.g. PREROUTING) and pkt is not mangled yet */ - new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; - - /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); - break; - case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icreq.callID); - break; - case PPTP_CALL_CLEAR_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); - break; - default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ - - case PPTP_SET_LINK_INFO: - /* only need to NAT in case PAC is behind NAT box */ - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = new_callid; + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icack.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; } /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass @@ -213,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; } -static int +static void pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { - struct ip_ct_pptp_master *ct_pptp_info = - &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = - &expect_orig->master->nat.help.nat_pptp_info; - struct ip_conntrack *ct = expect_orig->master; - - struct ip_conntrack_tuple inv_t; - struct ip_conntrack_tuple *orig_t, *reply_t; + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; - /* alter expectation */ - orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - /* alter expectation for PNS->PAC direction */ - invert_tuplepr(&inv_t, &expect_orig->tuple); - expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); - expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; expect_orig->dir = IP_CT_DIR_ORIGINAL; - inv_t.src.ip = reply_t->src.ip; - inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); - - if (!ip_conntrack_expect_related(expect_orig)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_orig)\n"); - return 1; - } /* alter expectation for PAC->PNS direction */ - invert_tuplepr(&inv_t, &expect_reply->tuple); - expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); - expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; expect_reply->dir = IP_CT_DIR_REPLY; - inv_t.src.ip = orig_t->src.ip; - inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); - - if (!ip_conntrack_expect_related(expect_reply)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_reply)\n"); - ip_conntrack_unexpect_related(expect_orig); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { - DEBUGP("can't register original keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { - DEBUGP("can't register reply keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - ip_ct_gre_keymap_destroy(ct); - return 1; - } - - return 0; } /* inbound packets == from PAC to PNS */ @@ -298,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union *pptpReq) { struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_cid = 0, new_pcid; - unsigned int pcid_off, cid_off = 0; + u_int16_t msg; + __be16 new_pcid; + unsigned int pcid_off; - new_pcid = htons(nat_pptp_info->pns_call_id); + new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REPLY: pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); - cid_off = offsetof(union pptp_ctrl_union, ocack.callID); break; case PPTP_IN_CALL_CONNECT: pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); @@ -325,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0]); /* fall through */ @@ -352,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, sizeof(new_pcid), (char *)&new_pcid, sizeof(new_pcid)) == 0) return NF_DROP; - - if (new_cid) { - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid)); - if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_cid), (char *)&new_cid, - sizeof(new_cid)) == 0) - return NF_DROP; - } return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 96ceabaec402..bf91f9312b3c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -6,10 +6,10 @@ * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -23,7 +23,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ip_nat.h> @@ -61,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, } /* generate unique tuple ... */ -static int +static int gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range, enum ip_nat_manip_type maniptype, const struct ip_conntrack *conntrack) { static u_int16_t key; - u_int16_t *keyptr; + __be16 *keyptr; unsigned int min, i, range_size; if (maniptype == IP_NAT_MANIP_SRC) @@ -85,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + DEBUGP("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -118,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb, greh = (void *)(*pskb)->data + hdroff; pgreh = (struct gre_hdr_pptp *) greh; - /* we only have destination manip of a packet, since 'source key' + /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype == IP_NAT_MANIP_DST) { /* key manipulation is always dest */ @@ -130,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb, } if (greh->csum) { /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = - ip_nat_cheat_check(~*(gre_key(greh)), + *(gre_csum(greh)) = + nf_proto_csum_update(*pskb, + ~*(gre_key(greh)), tuple->dst.u.gre.key, - *(gre_csum(greh))); + *(gre_csum(greh)), 0); } *(gre_key(greh)) = tuple->dst.u.gre.key; break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", + DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; @@ -152,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb, } /* nat helper struct */ -static struct ip_nat_protocol gre = { - .name = "GRE", +static struct ip_nat_protocol gre = { + .name = "GRE", .protonum = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, .in_range = gre_in_range, @@ -164,7 +164,7 @@ static struct ip_nat_protocol gre = { .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; - + int __init ip_nat_proto_gre_init(void) { return ip_nat_protocol_register(&gre); diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 31a3f4ccb99c..3f6efc13ac74 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb, return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); - - hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, - tuple->src.u.icmp.id, - hdr->checksum); + hdr->checksum = nf_proto_csum_update(*pskb, + hdr->un.echo.id ^ htons(0xFFFF), + tuple->src.u.icmp.id, + hdr->checksum, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a3d14079eba6..12deb13b93b1 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -24,7 +24,7 @@ tcp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - u_int16_t port; + __be16 port; if (maniptype == IP_NAT_MANIP_SRC) port = tuple->src.u.tcp.port; @@ -42,7 +42,7 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t port; - u_int16_t *portptr; + __be16 *portptr; unsigned int range_size, min, i; if (maniptype == IP_NAT_MANIP_SRC) @@ -93,8 +93,8 @@ tcp_manip_pkt(struct sk_buff **pskb, struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport, oldport; + __be32 oldip, newip; + __be16 *portptr, newport, oldport; int hdrsize = 8; /* TCP connection tracking guarantees this much */ /* this could be a inner header returned in icmp packet; in such @@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport, + hdr->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index ec6053fdc867..4bbec7730d18 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -24,7 +24,7 @@ udp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - u_int16_t port; + __be16 port; if (maniptype == IP_NAT_MANIP_SRC) port = tuple->src.u.udp.port; @@ -42,7 +42,7 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t port; - u_int16_t *portptr; + __be16 *portptr; unsigned int range_size, min, i; if (maniptype == IP_NAT_MANIP_SRC) @@ -91,8 +91,8 @@ udp_manip_pkt(struct sk_buff **pskb, struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport; + __be32 oldip, newip; + __be16 *portptr, newport; if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; @@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check) /* 0 is a special case meaning no checksum */ - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); + + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, + hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, + *portptr ^ htons(0xFFFF), newport, + hdr->check, 0); + if (!hdr->check) + hdr->check = -1; + } *portptr = newport; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aba926c1cb0..a176aa3031e0 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -19,14 +19,10 @@ #include <net/route.h> #include <linux/bitops.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -124,7 +119,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(u32 dstip, u32 srcip) +static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; @@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -213,7 +205,7 @@ alloc_null_binding(struct ip_conntrack *conntrack, per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). Use reply in case it's already been mangled (eg local packet). */ - u_int32_t ip + __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); @@ -230,7 +222,7 @@ alloc_null_binding_confirmed(struct ip_conntrack *conntrack, struct ip_nat_info *info, unsigned int hooknum) { - u_int32_t ip + __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); @@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c index 6ffba63adca2..71fc2730a007 100644 --- a/net/ipv4/netfilter/ip_nat_sip.c +++ b/net/ipv4/netfilter/ip_nat_sip.c @@ -60,8 +60,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; unsigned int bufflen, dataoff; - u_int32_t ip; - u_int16_t port; + __be32 ip; + __be16 port; dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); @@ -159,7 +159,7 @@ static int mangle_content_len(struct sk_buff **pskb, static unsigned int mangle_sdp(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct ip_conntrack *ct, - u_int32_t newip, u_int16_t port, + __be32 newip, u_int16_t port, const char *dptr) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; @@ -195,7 +195,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, { struct ip_conntrack *ct = exp->master; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - u_int32_t newip; + __be32 newip; u_int16_t port; DEBUGP("ip_nat_sdp():\n"); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index d20d557f915a..168f45fa1898 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -43,7 +43,6 @@ * 2000-08-06: Convert to new helper API (Harald Welte). * */ -#include <linux/config.h> #include <linux/in.h> #include <linux/module.h> #include <linux/types.h> @@ -1212,7 +1211,7 @@ static int snmp_translate(struct ip_conntrack *ct, struct sk_buff **pskb) { struct iphdr *iph = (*pskb)->nh.iph; - struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); int dir = CTINFO2DIR(ctinfo); @@ -1256,9 +1255,9 @@ static int help(struct sk_buff **pskb, struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); /* SNMP replies and originating SNMP traps get mangled */ - if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) return NF_ACCEPT; - if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; /* No NAT? */ diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 67e676783da9..021395b67463 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -18,7 +18,6 @@ * - now capable of multiple expectations for one master * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/icmp.h> #include <linux/ip.h> @@ -31,9 +30,6 @@ #include <net/checksum.h> #include <linux/spinlock.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat_rule.h> #include <linux/netfilter_ipv4/ip_nat_protocol.h> @@ -41,7 +37,6 @@ #include <linux/netfilter_ipv4/ip_nat_helper.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -111,11 +106,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_HW) - if (skb_checksum_help(*pskb, (out == NULL))) - return NF_DROP; - ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to @@ -146,8 +136,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(ct, ctinfo, + hooknum, pskb)) return NF_DROP; else return NF_ACCEPT; @@ -201,7 +191,7 @@ ip_nat_in(unsigned int hooknum, int (*okfn)(struct sk_buff *)) { unsigned int ret; - u_int32_t daddr = (*pskb)->nh.iph->daddr; + __be32 daddr = (*pskb)->nh.iph->daddr; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index b93f0494362f..7edad790478a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -52,15 +52,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; +static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; +static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; +static int peer_pid __read_mostly; +static unsigned int copy_range __read_mostly; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl; +static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); @@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } @@ -457,11 +457,19 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) if (entry->info->indev) if (entry->info->indev->ifindex == ifindex) return 1; - if (entry->info->outdev) if (entry->info->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } @@ -507,7 +515,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (type <= IPQM_BASE) return; - if (security_netlink_recv(skb)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); write_lock_bh(&queue_lock); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cee3397ec277..78a44b01c035 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -14,7 +14,6 @@ * 08 Oct 2005 Harald Welte <lafore@netfilter.org> * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables" */ -#include <linux/config.h> #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> @@ -181,8 +180,7 @@ ipt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip_tables: error: `%s'\n", (char *)targinfo); @@ -218,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata) + struct ipt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -231,7 +228,7 @@ ipt_do_table(struct sk_buff **pskb, const char *indev, *outdev; void *table_base; struct ipt_entry *e, *back; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* Initialization */ ip = (*pskb)->nh.iph; @@ -248,6 +245,7 @@ ipt_do_table(struct sk_buff **pskb, read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom @@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto cleanup_matches; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset) return delta; } -struct compat_ipt_standard_target +static void compat_standard_from_user(void *dst, void *src) { - struct compat_xt_entry_target target; - compat_int_t verdict; -}; + int v = *(compat_int_t *)src; -struct compat_ipt_standard -{ - struct compat_ipt_entry entry; - struct compat_ipt_standard_target target; -}; - -#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) -#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) -#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) + if (v > 0) + v += compat_calc_jump(v); + memcpy(dst, &v, sizeof(v)); +} -static int compat_ipt_standard_fn(void *target, - void **dstptr, int *size, int convert) +static int compat_standard_to_user(void __user *dst, void *src) { - struct compat_ipt_standard_target compat_st, *pcompat_st; - struct ipt_standard_target st, *pst; - int ret; + compat_int_t cv = *(int *)src; - ret = 0; - switch (convert) { - case COMPAT_TO_USER: - pst = target; - memcpy(&compat_st.target, &pst->target, - sizeof(compat_st.target)); - compat_st.verdict = pst->verdict; - if (compat_st.verdict > 0) - compat_st.verdict -= - compat_calc_jump(compat_st.verdict); - compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; - if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) - ret = -EFAULT; - *size -= IPT_ST_OFFSET; - *dstptr += IPT_ST_COMPAT_LEN; - break; - case COMPAT_FROM_USER: - pcompat_st = target; - memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); - st.verdict = pcompat_st->verdict; - if (st.verdict > 0) - st.verdict += compat_calc_jump(st.verdict); - st.target.u.user.target_size = IPT_ST_LEN; - memcpy(*dstptr, &st, IPT_ST_LEN); - *size += IPT_ST_OFFSET; - *dstptr += IPT_ST_LEN; - break; - case COMPAT_CALC_SIZE: - *size += IPT_ST_OFFSET; - break; - default: - ret = -ENOPROTOOPT; - break; - } - return ret; + if (cv > 0) + cv -= compat_calc_jump(cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static inline int compat_calc_match(struct ipt_entry_match *m, int * size) { - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(m->u.kernel.match); return 0; } @@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, entry_offset = (void *)e - base; IPT_MATCH_ITERATE(e, compat_calc_match, &off); t = ipt_get_target(e); - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1422,17 +1366,13 @@ struct compat_ipt_replace { static inline int compat_copy_match_to_user(struct ipt_entry_match *m, void __user **dstptr, compat_uint_t *size) { - if (m->u.kernel.match->compat) - return m->u.kernel.match->compat(m, dstptr, size, - COMPAT_TO_USER); - else - return xt_compat_match(m, dstptr, size, COMPAT_TO_USER); + return xt_compat_match_to_user(m, dstptr, size); } static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, compat_uint_t *size) { - struct ipt_entry_target __user *t; + struct ipt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; @@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e, if (ret) goto out; t = ipt_get_target(e); - if (t->u.kernel.target->compat) - ret = t->u.kernel.target->compat(t, dstptr, size, - COMPAT_TO_USER); - else - ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER); + ret = xt_compat_target_to_user(t, dstptr, size); if (ret) goto out; ret = -EFAULT; @@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m, return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(match); (*i)++; return 0; @@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, e->comefrom, &off, &j); if (ret != 0) - goto out; + goto cleanup_matches; t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; - goto out; + goto cleanup_matches; } t->u.kernel.target = target; - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(target); *size += off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, (*i)++; return 0; + out: + module_put(t->u.kernel.target->me); +cleanup_matches: IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } static inline int compat_copy_match_from_user(struct ipt_entry_match *m, void **dstptr, compat_uint_t *size, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) + const struct ipt_ip *ip, unsigned int hookmask, int *i) { struct ipt_entry_match *dm; struct ipt_match *match; @@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, dm = (struct ipt_entry_match *)*dstptr; match = m->u.kernel.match; - if (match->compat) - match->compat(m, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_match(m, dstptr, size, COMPAT_FROM_USER); + xt_compat_match_from_user(m, dstptr, size); ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), name, hookmask, ip->proto, ip->invflags & IPT_INV_PROTO); if (ret) - return ret; + goto err; if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, dm->data, - dm->u.match_size - sizeof(*dm), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); - return -EINVAL; + ret = -EINVAL; + goto err; } + (*i)++; return 0; + +err: + module_put(m->u.kernel.match->me); + return ret; } static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, @@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct ipt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int ret, h, j; ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); + j = 0; *dstptr += sizeof(struct compat_ipt_entry); ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, - name, &de->ip, de->comefrom); + name, &de->ip, de->comefrom, &j); if (ret) - goto out; + goto cleanup_matches; de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; - if (target->compat) - target->compat(t, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_target(t, dstptr, size, COMPAT_FROM_USER); + xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_IP_NUMHOOKS; h++) { @@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, name, e->comefrom, e->ip.proto, e->ip.invflags & IPT_INV_PROTO); if (ret) - goto out; + goto err; ret = -EINVAL; if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, *size)) - goto out; + goto err; } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, de, target, - t->data, t->u.target_size - sizeof(*t), - de->comefrom)) { + t->data, de->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); - goto out; + goto err; } ret = 0; -out: + return ret; + +err: + module_put(t->u.kernel.target->me); +cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } @@ -1761,7 +1697,7 @@ translate_compat_table(const char *name, goto free_newinfo; /* And one copy for every other CPU */ - for_each_cpu(i) + for_each_possible_cpu(i) if (newinfo->entries[i] && newinfo->entries[i] != entry1) memcpy(newinfo->entries[i], entry1, newinfo->size); @@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) return ret; } +static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); + static int compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { @@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = compat_get_entries(user, len); break; default: - duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); - ret = -EINVAL; + ret = do_ipt_get_ctl(sk, cmd, user, len); } return ret; } @@ -2113,7 +2050,8 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) return ret; } - if (xt_register_table(table, &bootstrap, newinfo) != 0) { + ret = xt_register_table(table, &bootstrap, newinfo); + if (ret != 0) { xt_free_table_info(newinfo); return ret; } @@ -2184,7 +2122,6 @@ icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_icmp *icmpinfo = matchinfo; @@ -2199,7 +2136,9 @@ static struct ipt_target ipt_standard_target = { .targetsize = sizeof(int), .family = AF_INET, #ifdef CONFIG_COMPAT - .compat = &compat_ipt_standard_fn, + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, #endif }; @@ -2239,22 +2178,39 @@ static int __init ip_tables_init(void) { int ret; - xt_proto_init(AF_INET); + ret = xt_proto_init(AF_INET); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&ipt_standard_target); - xt_register_target(&ipt_error_target); - xt_register_match(&icmp_matchstruct); + ret = xt_register_target(&ipt_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&ipt_error_target); + if (ret < 0) + goto err3; + ret = xt_register_match(&icmp_matchstruct); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err5; printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_match(&icmp_matchstruct); +err4: + xt_unregister_target(&ipt_error_target); +err3: + xt_unregister_target(&ipt_standard_target); +err2: + xt_proto_fini(AF_INET); +err1: + return ret; } static void __exit ip_tables_fini(void) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index dbc83c5d7aa6..7a29d6e7baa7 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -10,7 +10,6 @@ * */ #include <linux/module.h> -#include <linux/config.h> #include <linux/proc_fs.h> #include <linux/jhash.h> #include <linux/bitops.h> @@ -53,7 +52,7 @@ struct clusterip_config { atomic_t entries; /* number of entries/rules * referencing us */ - u_int32_t clusterip; /* the IP address */ + __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ @@ -120,7 +119,7 @@ clusterip_config_entry_put(struct clusterip_config *c) } static struct clusterip_config * -__clusterip_config_find(u_int32_t clusterip) +__clusterip_config_find(__be32 clusterip) { struct list_head *pos; @@ -137,7 +136,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip, int entry) +clusterip_config_find_get(__be32 clusterip, int entry) { struct clusterip_config *c; @@ -167,17 +166,16 @@ clusterip_config_init_nodelist(struct clusterip_config *c, } static struct clusterip_config * -clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, +clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; char buffer[16]; - c = kmalloc(sizeof(*c), GFP_ATOMIC); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return NULL; - memset(c, 0, sizeof(*c)); c->dev = dev; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); @@ -304,8 +302,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; @@ -375,7 +372,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -391,7 +387,7 @@ checkentry(const char *tablename, return 0; } - if (e->ip.dmsk.s_addr != 0xffffffff + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); return 0; @@ -452,8 +448,7 @@ checkentry(const char *tablename, } /* drop reference count of cluster config when rule is deleted */ -static void destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +static void destroy(const struct xt_target *target, void *targinfo) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -481,9 +476,9 @@ static struct ipt_target clusterip_tgt = { /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ struct arp_payload { u_int8_t src_hw[ETH_ALEN]; - u_int32_t src_ip; + __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; - u_int32_t dst_ip; + __be32 dst_ip; } __attribute__ ((packed)); #ifdef CLUSTERIP_DEBUG diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c deleted file mode 100644 index c8e971288dfe..000000000000 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ /dev/null @@ -1,96 +0,0 @@ -/* iptables module for setting the IPv4 DSCP field, Version 1.8 - * - * (C) 2002 by Harald Welte <laforge@netfilter.org> - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp -*/ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_DSCP.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables DSCP modification module"); -MODULE_LICENSE("GPL"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo, - void *userinfo) -{ - const struct ipt_DSCP_info *dinfo = targinfo; - u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - - if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) - | sh_dscp; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^ 0xFFFF)); - } - return IPT_CONTINUE; -} - -static int -checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; - - if ((dscp > IPT_DSCP_MAX)) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; - } - return 1; -} - -static struct ipt_target ipt_dscp_reg = { - .name = "DSCP", - .target = target, - .targetsize = sizeof(struct ipt_DSCP_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_target(&ipt_dscp_reg); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_target(&ipt_dscp_reg); -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4adf5c9d34f5..12a818a2462f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module"); static inline int set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { - if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) - != (einfo->ip_ect & IPT_ECN_IP_MASK)) { - u_int16_t diffs[2]; + struct iphdr *iph = (*pskb)->nh.iph; + __be16 oldtos; + if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; - (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); + iph = (*pskb)->nh.iph; + oldtos = iph->tos; + iph->tos &= ~IPT_ECN_IP_MASK; + iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos, + iph->check); } return 1; } /* Return 0 if there was an error. */ static inline int -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) +set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; - u_int16_t diffs[2]; + __be16 oldval; /* Not enought header? */ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, @@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, inward)) - return 0; - - diffs[0] = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - diffs[1] = ((u_int16_t *)tcph)[6]; - diffs[0] = diffs[0] ^ 0xFFFF; - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) - tcph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - tcph->check^0xFFFF)); + tcph->check = nf_proto_csum_update((*pskb), + oldval ^ htons(0xFFFF), + ((__be16 *)tcph)[6], + tcph->check, 0); return 1; } @@ -95,8 +85,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_ECN_info *einfo = targinfo; @@ -106,7 +95,7 @@ target(struct sk_buff **pskb, if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && (*pskb)->nh.iph->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo, (out == NULL))) + if (!set_ect_tcp(pskb, einfo)) return NF_DROP; return IPT_CONTINUE; @@ -117,7 +106,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b98f7b08b084..7dc820df8bc5 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 8b3e7f99b861..3dbfcfac8a84 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -9,7 +9,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/inetdevice.h> #include <linux/ip.h> @@ -43,7 +42,6 @@ masquerade_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -65,15 +63,14 @@ masquerade_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; const struct ip_nat_multi_range_compat *mr; struct ip_nat_range newrange; struct rtable *rt; - u_int32_t newsrc; + __be32 newsrc; IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 2fcf1075b027..58a88f227108 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -10,7 +10,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/ip.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -34,7 +33,6 @@ check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -56,12 +54,11 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t new_ip, netmask; + __be32 new_ip, netmask; const struct ip_nat_multi_range_compat *mr = targinfo; struct ip_nat_range newrange; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index f290463232de..c0dcfe9d610c 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -36,7 +36,6 @@ redirect_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -58,12 +57,11 @@ redirect_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t newdst; + __be32 newdst; const struct ip_nat_multi_range_compat *mr = targinfo; struct ip_nat_range newrange; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 431a3ce6f7b7..fd0c05efed8a 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -12,7 +12,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> @@ -91,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.proto = IPPROTO_TCP; fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; + security_skb_classify_flow(skb, &fl); xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); @@ -104,8 +104,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; struct rtable *rt; - u_int16_t tmp_port; - u_int32_t tmp_addr; + __be16 tmp_port; + __be32 tmp_addr; int needs_ack; int hh_len; @@ -185,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->urg_ptr = 0; /* Adjust TCP checksum */ + nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, @@ -227,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_reject_info *reject = targinfo; @@ -276,7 +276,6 @@ static int check(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 7169b09b5a67..b38b13328d73 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -52,7 +52,6 @@ same_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { unsigned int count, countess, rangeip, index = 0; @@ -116,8 +115,7 @@ same_check(const char *tablename, } static void -same_destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +same_destroy(const struct xt_target *target, void *targinfo) { struct ipt_same_info *mr = targinfo; @@ -133,12 +131,12 @@ same_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t tmpip, aindex, new_ip; + u_int32_t tmpip, aindex; + __be32 new_ip; const struct ipt_same_info *same = targinfo; struct ip_nat_range newrange; const struct ip_conntrack_tuple *t; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index ef2fe5b3f0d8..108b6b76311f 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -21,26 +21,14 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables TCP MSS modification module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static u_int16_t -cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} - static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; - else return opt[offset+1]; + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; } static unsigned int @@ -49,26 +37,21 @@ ipt_tcpmss_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; struct tcphdr *tcph; struct iphdr *iph; - u_int16_t tcplen, newtotlen, oldval, newmss; + u_int16_t tcplen, newmss; + __be16 newtotlen, oldval; unsigned int i; u_int8_t *opt; if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, out == NULL)) - return NF_DROP; - iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; - tcph = (void *)iph + iph->ihl*4; /* Since it passed flags test in tcp match, we know it is is @@ -84,54 +67,41 @@ ipt_tcpmss_target(struct sk_buff **pskb, return NF_DROP; } - if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if(!(*pskb)->dst) { + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { + if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + + sizeof(struct tcphdr)) { if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); + printk(KERN_ERR "ipt_tcpmss_target: " + "unknown or invalid path-MTU (%d)\n", + dst_mtu((*pskb)->dst)); return NF_DROP; /* or IPT_CONTINUE ?? */ } - if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); + newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - + sizeof(struct tcphdr); } else newmss = tcpmssinfo->mss; opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ - if ((opt[i] == TCPOPT_MSS) && - ((tcph->doff*4 - i) >= TCPOLEN_MSS) && - (opt[i+1] == TCPOLEN_MSS)) { + for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && + opt[i+1] == TCPOLEN_MSS) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - (oldmss <= newmss)) - return IPT_CONTINUE; + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && + oldmss <= newmss) + return IPT_CONTINUE; opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); - tcph->check = cheat_check(htons(oldmss)^0xFFFF, - htons(newmss), - tcph->check); - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu changed TCP MSS option" - " (from %u to %u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - oldmss, newmss); - goto retmodified; + tcph->check = nf_proto_csum_update(*pskb, + htons(oldmss)^htons(0xFFFF), + htons(newmss), + tcph->check, 0); + return IPT_CONTINUE; } } @@ -143,13 +113,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target:" - " unable to allocate larger skb\n"); + if (!newskb) return NF_DROP; - } - kfree_skb(*pskb); *pskb = newskb; iph = (*pskb)->nh.iph; @@ -161,36 +126,29 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, - htons(tcplen + TCPOLEN_MSS), tcph->check); - tcplen += TCPOLEN_MSS; - + tcph->check = nf_proto_csum_update(*pskb, + htons(tcplen) ^ htons(0xFFFF), + htons(tcplen + TCPOLEN_MSS), + tcph->check, 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); + tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt), + tcph->check, 0); - oldval = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - tcph->check = cheat_check(oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + oldval ^ htons(0xFFFF), + ((__be16 *)tcph)[6], + tcph->check, 0); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = cheat_check(iph->tot_len ^ 0xFFFF, - newtotlen, iph->check); + iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF), + newtotlen, iph->check); iph->tot_len = newtotlen; - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - newmss); - - retmodified: return IPT_CONTINUE; } @@ -200,9 +158,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m) { const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - if (strcmp(m->u.kernel.match->name, "tcp") == 0 - && (tcpinfo->flg_cmp & TH_SYN) - && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TH_SYN && + !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) return 1; return 0; @@ -214,17 +172,17 @@ ipt_tcpmss_checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; const struct ipt_entry *e = e_void; - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - ((hook_mask & ~((1 << NF_IP_FORWARD) - | (1 << NF_IP_LOCAL_OUT) - | (1 << NF_IP_POST_ROUTING))) != 0)) { - printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP_FORWARD) | + (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_POST_ROUTING))) != 0) { + printk("TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); return 0; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 1c7a5ca399b3..6b8b14ccc3d3 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -26,27 +26,20 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; + struct iphdr *iph = (*pskb)->nh.iph; + __be16 oldtos; - if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { - u_int16_t diffs[2]; - + if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos - = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) - | tosinfo->tos; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); + iph = (*pskb)->nh.iph; + oldtos = iph->tos; + iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; + iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos, + iph->check); } return IPT_CONTINUE; } @@ -56,7 +49,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index f48892ae0be5..ac9517d62af0 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -23,11 +23,10 @@ static unsigned int ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct iphdr *iph; const struct ipt_TTL_info *info = targinfo; - u_int16_t diffs[2]; int new_ttl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF), + htons(new_ttl << 8), + iph->check); iph->ttl = new_ttl; - diffs[1] = htons(((unsigned)iph->ttl) << 8); - iph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - iph->check^0xFFFF)); } return IPT_CONTINUE; @@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_TTL_info *info = targinfo; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index c84cc03389d8..2b104ea54f48 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -47,7 +47,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/skbuff.h> @@ -116,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum) del_timer(&ub->timer); } + if (!ub->skb) { + DEBUGP("ipt_ULOG: ulog_send: nothing to send\n"); + return; + } + /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; @@ -304,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; @@ -342,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hookmask) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 893dae210b04..7b60eb74788b 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -22,7 +22,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("iptables addrtype match"); -static inline int match_type(u_int32_t addr, u_int16_t mask) +static inline int match_type(__be32 addr, u_int16_t mask) { return !!(mask & (1 << inet_addr_type(addr))); } diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 2927135873d7..1798f86bc534 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -74,7 +74,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ipt_ah *ahinfo = matchinfo; diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c deleted file mode 100644 index 47177591aeb6..000000000000 --- a/net/ipv4/netfilter/ipt_dscp.c +++ /dev/null @@ -1,54 +0,0 @@ -/* IP tables module for matching the value of the IPv4 DSCP field - * - * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp - * - * (C) 2002 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_dscp.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables DSCP matching module"); -MODULE_LICENSE("GPL"); - -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_dscp_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - - u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; -} - -static struct ipt_match dscp_match = { - .name = "dscp", - .match = match, - .matchsize = sizeof(struct ipt_dscp_info), - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_match(&dscp_match); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_match(&dscp_match); - -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index b28250414933..dafbdec0efc0 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 92980ab8ce48..33ccdbf8e794 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -50,11 +50,11 @@ static struct file_operations dl_file_ops; /* hash table crap */ struct dsthash_dst { - u_int32_t src_ip; - u_int32_t dst_ip; + __be32 src_ip; + __be32 dst_ip; /* ports have to be consecutive !!! */ - u_int16_t src_port; - u_int16_t dst_port; + __be16 src_port; + __be16 dst_port; }; struct dsthash_ent { @@ -106,8 +106,10 @@ static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) static inline u_int32_t hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst) { - return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port), - dst->src_ip, ht->rnd) % ht->cfg.size); + return (jhash_3words((__force u32)dst->dst_ip, + ((__force u32)dst->dst_port<<16 | + (__force u32)dst->src_port), + (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size); } static inline struct dsthash_ent * @@ -406,7 +408,7 @@ hashlimit_match(const struct sk_buff *skb, dst.src_ip = skb->nh.iph->saddr; if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) { - u_int16_t _ports[2], *ports; + __be16 _ports[2], *ports; switch (skb->nh.iph->protocol) { case IPPROTO_TCP: @@ -454,15 +456,12 @@ hashlimit_match(const struct sk_buff *skb, dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); dh->rateinfo.cost = user2credits(hinfo->cfg.avg); - - spin_unlock_bh(&hinfo->lock); - return 1; + } else { + /* update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now); } - /* update expiration timeout */ - dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - - rateinfo_recalc(dh, now); if (dh->rateinfo.credit >= dh->rateinfo.cost) { /* We're underlimit. */ dh->rateinfo.credit -= dh->rateinfo.cost; @@ -481,7 +480,6 @@ hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct ipt_hashlimit_info *r = matchinfo; @@ -508,6 +506,9 @@ hashlimit_checkentry(const char *tablename, if (!r->cfg.expire) return 0; + if (r->name[sizeof(r->name) - 1] != '\0') + return 0; + /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before ip_tables.c grabs ipt_mutex. * We also cannot grab the hashtable spinlock, since htable_create will @@ -529,18 +530,46 @@ hashlimit_checkentry(const char *tablename, } static void -hashlimit_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +hashlimit_destroy(const struct xt_match *match, void *matchinfo) { struct ipt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } +#ifdef CONFIG_COMPAT +struct compat_ipt_hashlimit_info { + char name[IFNAMSIZ]; + struct hashlimit_cfg cfg; + compat_uptr_t hinfo; + compat_uptr_t master; +}; + +static void compat_from_user(void *dst, void *src) +{ + int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + + memcpy(dst, src, off); + memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off); +} + +static int compat_to_user(void __user *dst, void *src) +{ + int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + + return copy_to_user(dst, src, off) ? -EFAULT : 0; +} +#endif + static struct ipt_match ipt_hashlimit = { .name = "hashlimit", .match = hashlimit_match, .matchsize = sizeof(struct ipt_hashlimit_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_hashlimit_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .checkentry = hashlimit_checkentry, .destroy = hashlimit_destroy, .me = THIS_MODULE diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 5ac6ac023b5e..78c336f12a9e 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -56,7 +56,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_owner_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 61a2139f9cfd..126db44e71a8 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -35,20 +35,25 @@ static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; static unsigned int ip_list_hash_size = 0; static unsigned int ip_list_perms = 0644; +static unsigned int ip_list_uid = 0; +static unsigned int ip_list_gid = 0; module_param(ip_list_tot, uint, 0400); module_param(ip_pkt_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); +module_param(ip_list_uid, uint, 0400); +module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); - +MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); +MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); struct recent_entry { struct list_head list; struct list_head lru_list; - u_int32_t addr; + __be32 addr; u_int8_t ttl; u_int8_t index; u_int16_t nstamps; @@ -79,17 +84,17 @@ static struct file_operations recent_fops; static u_int32_t hash_rnd; static int hash_rnd_initted; -static unsigned int recent_entry_hash(u_int32_t addr) +static unsigned int recent_entry_hash(__be32 addr) { if (!hash_rnd_initted) { get_random_bytes(&hash_rnd, 4); hash_rnd_initted = 1; } - return jhash_1word(addr, hash_rnd) & (ip_list_hash_size - 1); + return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1); } static struct recent_entry * -recent_entry_lookup(const struct recent_table *table, u_int32_t addr, u_int8_t ttl) +recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl) { struct recent_entry *e; unsigned int h; @@ -110,7 +115,7 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) } static struct recent_entry * -recent_entry_init(struct recent_table *t, u_int32_t addr, u_int8_t ttl) +recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) { struct recent_entry *e; @@ -172,7 +177,7 @@ ipt_recent_match(const struct sk_buff *skb, const struct ipt_recent_info *info = matchinfo; struct recent_table *t; struct recent_entry *e; - u_int32_t addr; + __be32 addr; u_int8_t ttl; int ret = info->invert; @@ -232,7 +237,7 @@ out: static int ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -274,6 +279,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip, goto out; } t->proc->proc_fops = &recent_fops; + t->proc->uid = ip_list_uid; + t->proc->gid = ip_list_gid; t->proc->data = t; #endif spin_lock_bh(&recent_lock); @@ -286,8 +293,7 @@ out: } static void -ipt_recent_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +ipt_recent_destroy(const struct xt_match *match, void *matchinfo) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -399,7 +405,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - u_int32_t addr; + __be32 addr; int add; if (size > sizeof(buf)) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7f417484bfbf..e2e7dd8d7903 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -90,7 +90,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 397b95cc026b..e62ea2bb9c0a 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -10,7 +10,6 @@ * * Extended to all five netfilter hooks by Brad Chapman & Harald Welte */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netdevice.h> @@ -120,7 +119,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -132,7 +131,7 @@ ipt_local_hook(unsigned int hook, { unsigned int ret; u_int8_t tos; - u_int32_t saddr, daddr; + __be32 saddr, daddr; unsigned long nfmark; /* root is playing with raw sockets. */ @@ -149,7 +148,7 @@ ipt_local_hook(unsigned int hook, daddr = (*pskb)->nh.iph->daddr; tos = (*pskb)->nh.iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE && ((*pskb)->nh.iph->saddr != saddr diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7912cce1e1b8..bcbeb4aeacd9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -95,7 +95,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8cc8e1b36778..0af803df82b0 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -14,7 +14,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 663a73ee3f2f..790f00d500c3 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -25,7 +25,7 @@ #include <net/netfilter/nf_conntrack_protocol.h> #include <net/netfilter/nf_conntrack_core.h> -unsigned long nf_ct_icmp_timeout = 30*HZ; +unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d61e2a9d394d..9c6cbe3d9fb8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), + SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), + SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 291831e792af..05f5114828ea 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -32,7 +32,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> -#include <linux/config.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bd221ec3f81e..b430cf2a4f66 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -38,8 +38,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include <linux/config.h> + #include <linux/types.h> #include <asm/atomic.h> #include <asm/byteorder.h> @@ -382,8 +381,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; - u32 daddr; - u32 saddr; + __be32 daddr; + __be32 saddr; u8 tos; int err; @@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) @@ -609,6 +609,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sin) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = skb->nh.iph->saddr; + sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); } if (inet->cmsg_flags) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cc9423de7311..c41ddba02e9d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -64,7 +64,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -105,6 +104,7 @@ #include <net/icmp.h> #include <net/xfrm.h> #include <net/ip_mp_alg.h> +#include <net/netevent.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -206,21 +206,27 @@ __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ + defined(CONFIG_PROVE_LOCKING) /* * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * The size of this table is a power of two and depends on the number of CPUS. + * (on lockdep we have a quite big spinlock_t, so keep the size down there) */ -#if NR_CPUS >= 32 -#define RT_HASH_LOCK_SZ 4096 -#elif NR_CPUS >= 16 -#define RT_HASH_LOCK_SZ 2048 -#elif NR_CPUS >= 8 -#define RT_HASH_LOCK_SZ 1024 -#elif NR_CPUS >= 4 -#define RT_HASH_LOCK_SZ 512 +#ifdef CONFIG_LOCKDEP +# define RT_HASH_LOCK_SZ 256 #else -#define RT_HASH_LOCK_SZ 256 +# if NR_CPUS >= 32 +# define RT_HASH_LOCK_SZ 4096 +# elif NR_CPUS >= 16 +# define RT_HASH_LOCK_SZ 2048 +# elif NR_CPUS >= 8 +# define RT_HASH_LOCK_SZ 1024 +# elif NR_CPUS >= 4 +# define RT_HASH_LOCK_SZ 512 +# else +# define RT_HASH_LOCK_SZ 256 +# endif #endif static spinlock_t *rt_hash_locks; @@ -244,7 +250,7 @@ static unsigned int rt_hash_rnd; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ - (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++) + (__raw_get_cpu_var(rt_cache_stat).field++) static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); @@ -255,6 +261,10 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) & rt_hash_mask); } +#define rt_hash(daddr, saddr, idx) \ + rt_hash_code((__force u32)(__be32)(daddr),\ + (__force u32)(__be32)(saddr) ^ ((idx) << 5)) + #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { int bucket; @@ -1068,7 +1078,7 @@ static void ip_select_fb_ident(struct iphdr *iph) u32 salt; spin_lock_bh(&ip_fb_id_lock); - salt = secure_ip_id(ip_fallback_id ^ iph->daddr); + salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); iph->id = htons(salt & 0xFFFF); ip_fallback_id = salt; spin_unlock_bh(&ip_fb_id_lock); @@ -1112,14 +1122,15 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, - u32 saddr, struct net_device *dev) +void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, + __be32 saddr, struct net_device *dev) { int i, k; struct in_device *in_dev = in_dev_get(dev); struct rtable *rth, **rthp; - u32 skeys[2] = { saddr, 0 }; + __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + struct netevent_redirect netevent; if (!in_dev) return; @@ -1140,8 +1151,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { - unsigned hash = rt_hash_code(daddr, - skeys[i] ^ (ikeys[k] << 5)); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); rthp=&rt_hash_table[hash].chain; @@ -1211,6 +1221,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, rt_drop(rt); goto do_next; } + + netevent.old = &rth->u.dst; + netevent.new = &rt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, + &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1248,9 +1263,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { - unsigned hash = rt_hash_code(rt->fl.fl4_dst, - rt->fl.fl4_src ^ - (rt->fl.oif << 5)); + unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, + rt->fl.oif); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " "%u.%u.%u.%u/%02x dropped\n", @@ -1385,15 +1399,15 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) int i; unsigned short old_mtu = ntohs(iph->tot_len); struct rtable *rth; - u32 skeys[2] = { iph->saddr, 0, }; - u32 daddr = iph->daddr; + __be32 skeys[2] = { iph->saddr, 0, }; + __be32 daddr = iph->daddr; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { - unsigned hash = rt_hash_code(daddr, skeys[i]); + unsigned hash = rt_hash(daddr, skeys[i], 0); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1447,6 +1461,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1517,7 +1532,7 @@ static int ip_rt_bug(struct sk_buff *skb) void ip_rt_get_source(u8 *addr, struct rtable *rt) { - u32 src; + __be32 src; struct fib_result res; if (rt->fl.iif == 0) @@ -1583,12 +1598,12 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } -static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { unsigned hash; struct rtable *rth; - u32 spec_dst; + __be32 spec_dst; struct in_device *in_dev = in_dev_get(dev); u32 itag = 0; @@ -1652,7 +1667,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); + hash = rt_hash(daddr, saddr, dev->ifindex); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); e_nobufs: @@ -1668,8 +1683,8 @@ e_inval: static void ip_handle_martian_source(struct net_device *dev, struct in_device *in_dev, struct sk_buff *skb, - u32 daddr, - u32 saddr) + __be32 daddr, + __be32 saddr) { RT_CACHE_STAT_INC(in_martian_src); #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -1699,7 +1714,7 @@ static void ip_handle_martian_source(struct net_device *dev, static inline int __mkroute_input(struct sk_buff *skb, struct fib_result* res, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos, + __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { @@ -1707,7 +1722,8 @@ static inline int __mkroute_input(struct sk_buff *skb, int err; struct in_device *out_dev; unsigned flags = 0; - u32 spec_dst, itag; + __be32 spec_dst; + u32 itag; /* get a working reference to the output device */ out_dev = in_dev_get(FIB_RES_DEV(*res)); @@ -1800,7 +1816,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable* rth = NULL; int err; @@ -1817,7 +1833,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); + hash = rt_hash(daddr, saddr, fl->iif); return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } @@ -1825,7 +1841,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED struct rtable* rth = NULL, *rtres; @@ -1858,7 +1874,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); + hash = rt_hash(daddr, saddr, fl->iif); err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -1888,7 +1904,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, * 2. IP spoofing attempts are filtered with 100% of guarantee. */ -static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, +static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct fib_result res; @@ -1907,7 +1923,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, u32 itag = 0; struct rtable * rth; unsigned hash; - u32 spec_dst; + __be32 spec_dst; int err = -EINVAL; int free_res = 0; @@ -1923,7 +1939,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) goto martian_source; - if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0)) + if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) goto brd_input; /* Accept zero addresses only to limited broadcast; @@ -2035,7 +2051,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); + hash = rt_hash(daddr, saddr, fl.iif); err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); goto done; @@ -2074,7 +2090,7 @@ martian_source: goto e_inval; } -int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct rtable * rth; @@ -2082,7 +2098,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, int iif = dev->ifindex; tos &= IPTOS_RT_MASK; - hash = rt_hash_code(daddr, saddr ^ (iif << 5)); + hash = rt_hash(daddr, saddr, iif); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2156,7 +2172,7 @@ static inline int __mkroute_output(struct rtable **result, if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) return -EINVAL; - if (fl->fl4_dst == 0xFFFFFFFF) + if (fl->fl4_dst == htonl(0xFFFFFFFF)) res->type = RTN_BROADCAST; else if (MULTICAST(fl->fl4_dst)) res->type = RTN_MULTICAST; @@ -2280,8 +2296,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - hash = rt_hash_code(oldflp->fl4_dst, - oldflp->fl4_src ^ (oldflp->oif << 5)); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); err = rt_intern_hash(hash, rth, rp); } @@ -2323,9 +2338,8 @@ static inline int ip_mkroute_output(struct rtable** rp, if (err != 0) goto cleanup; - hash = rt_hash_code(oldflp->fl4_dst, - oldflp->fl4_src ^ - (oldflp->oif << 5)); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, + oldflp->oif); err = rt_intern_hash(hash, rth, rp); /* forward hop information to multipath impl. */ @@ -2404,7 +2418,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) */ if (oldflp->oif == 0 - && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) { + && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2441,7 +2455,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) goto out; /* Wrong error code */ } - if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) { + if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) { if (!fl.fl4_src) fl.fl4_src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); @@ -2554,7 +2568,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) unsigned hash; struct rtable *rth; - hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2626,51 +2640,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, { struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + struct nlmsghdr *nlh; struct rta_cacheinfo ci; -#ifdef CONFIG_IP_MROUTE - struct rtattr *eptr; -#endif - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); - r = NLMSG_DATA(nlh); + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + if (nlh == NULL) + return -ENOBUFS; + + r = nlmsg_data(nlh); r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; + NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); + + NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); + if (rt->fl.fl4_src) { r->rtm_src_len = 32; - RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); + NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); + NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { - __u32 alg = rt->rt_multipath_alg; - - RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); - } + if (rt->rt_multipath_alg != IP_MP_ALG_NONE) + NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); #endif if (rt->fl.iif) - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); + if (rt->rt_dst != rt->rt_gateway) - RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); + NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); + if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); ci.rta_used = rt->u.dst.__use; ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); @@ -2687,13 +2704,10 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } } -#ifdef CONFIG_IP_MROUTE - eptr = (struct rtattr*)skb->tail; -#endif - RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + if (rt->fl.iif) { #ifdef CONFIG_IP_MROUTE - u32 dst = rt->rt_dst; + __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_devconf.mc_forwarding) { @@ -2702,41 +2716,48 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (!nowait) { if (err == 0) return 0; - goto nlmsg_failure; + goto nla_put_failure; } else { if (err == -EMSGSIZE) - goto nlmsg_failure; - ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; + goto nla_put_failure; + ci.rta_error = err; } } } else #endif - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; - u32 dst = 0; - u32 src = 0; - int iif = 0; - int err = -ENOBUFS; + __be32 dst = 0; + __be32 src = 0; + u32 iif; + int err; struct sk_buff *skb; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - goto out; + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. @@ -2747,62 +2768,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->nh.iph->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - if (rta[RTA_SRC - 1]) - memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); - if (rta[RTA_DST - 1]) - memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); - if (rta[RTA_IIF - 1]) - memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); + src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; + iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; if (iif) { - struct net_device *dev = __dev_get_by_index(iif); - err = -ENODEV; - if (!dev) - goto out_free; + struct net_device *dev; + + dev = __dev_get_by_index(iif); + if (dev == NULL) { + err = -ENODEV; + goto errout_free; + } + skb->protocol = htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = (struct rtable*)skb->dst; - if (!err && rt->u.dst.error) + + rt = (struct rtable*) skb->dst; + if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos } } }; - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = dst, + .saddr = src, + .tos = rtm->rtm_tos, + }, + }, + .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + }; err = ip_route_output_key(&rt, &fl); } + if (err) - goto out_free; + goto errout_free; skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (!err) - goto out_free; - if (err < 0) { - err = -EMSGSIZE; - goto out_free; - } + if (err <= 0) + goto errout_free; - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; -out: return err; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +errout: + return err; -out_free: +errout_free: kfree_skb(skb); - goto out; + goto errout; } int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -3130,13 +3150,9 @@ int __init ip_rt_init(void) } #endif - ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", - sizeof(struct rtable), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!ipv4_dst_ops.kmem_cachep) - panic("IP: failed to allocate ip_dst_cache\n"); + ipv4_dst_ops.kmem_cachep = + kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); rt_hash_table = (struct rt_hash_bucket *) alloc_large_system_hash("IP route cache", @@ -3144,7 +3160,7 @@ int __init ip_rt_init(void) rhash_entries, (num_physpages >= 128 * 1024) ? 15 : 17, - HASH_HIGHMEM, + 0, &rt_hash_log, &rt_hash_mask, 0); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e20be3331f67..661e0a4bca72 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; + security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ce4cd5f35511..e82a5be894b5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> -#include <linux/config.h> #include <linux/igmp.h> #include <linux/inetdevice.h> #include <net/snmp.h> @@ -18,6 +17,7 @@ #include <net/ip.h> #include <net/route.h> #include <net/tcp.h> +#include <net/cipso_ipv4.h> /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, return ret; } +static int __init tcp_congestion_default(void) +{ + return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); +} + +late_initcall(tcp_congestion_default); ctl_table ipv4_table[] = { { @@ -698,6 +704,40 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#ifdef CONFIG_NETLABEL + { + .ctl_name = NET_CIPSOV4_CACHE_ENABLE, + .procname = "cipso_cache_enable", + .data = &cipso_v4_cache_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, + .procname = "cipso_cache_bucket_size", + .data = &cipso_v4_cache_bucketsize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_OPTFMT, + .procname = "cipso_rbm_optfmt", + .data = &cipso_v4_rbm_optfmt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, + .procname = "cipso_rbm_strictvalid", + .data = &cipso_v4_rbm_strictvalid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* CONFIG_NETLABEL */ { .ctl_name = 0 } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0e029c4e2903..66e9a729f6df 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -247,7 +247,6 @@ * TCP_CLOSE socket is finished */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -269,7 +268,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> -int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; +int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; @@ -569,7 +568,7 @@ new_segment: skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -643,7 +642,7 @@ static inline int select_size(struct sock *sk, struct tcp_sock *tp) int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { - if (sk->sk_route_caps & NETIF_F_TSO) + if (sk_can_gso(sk)) tmp = 0; else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); @@ -724,7 +723,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, tp, skb); copy = size_goal; @@ -956,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -1133,7 +1135,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else @@ -1660,7 +1662,8 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); + inet_csk_reset_keepalive_timer(sk, + tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; @@ -2145,7 +2148,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; @@ -2166,15 +2169,36 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg) if (!pskb_may_pull(skb, thlen)) goto out; - oldlen = ~htonl(skb->len); + oldlen = (u16)~skb->len; __skb_pull(skb, thlen); - segs = skb_segment(skb, sg); + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + int mss; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + mss = skb_shinfo(skb)->gso_size; + skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + + segs = NULL; + goto out; + } + + segs = skb_segment(skb, features); if (IS_ERR(segs)) goto out; len = skb_shinfo(skb)->gso_size; - delta = csum_add(oldlen, htonl(thlen + len)); + delta = htonl(oldlen + (thlen + len)); skb = segs; th = skb->h.th; @@ -2183,10 +2207,10 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg) do { th->fin = th->psh = 0; - if (skb->ip_summed == CHECKSUM_NONE) { - th->check = csum_fold(csum_partial( - skb->h.raw, thlen, csum_add(skb->csum, delta))); - } + th->check = ~csum_fold(th->check + delta); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb->h.raw, thlen, + skb->csum)); seq += len; skb = skb->next; @@ -2196,15 +2220,16 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg) th->cwr = 0; } while (skb->next); - if (skb->ip_summed == CHECKSUM_NONE) { - delta = csum_add(oldlen, htonl(skb->tail - skb->h.raw)); - th->check = csum_fold(csum_partial( - skb->h.raw, thlen, csum_add(skb->csum, delta))); - } + delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); + th->check = ~csum_fold(th->check + delta); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb->h.raw, thlen, + skb->csum)); out: return segs; } +EXPORT_SYMBOL(tcp_tso_segment); extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; @@ -2232,9 +2257,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!tcp_hashinfo.bind_bucket_cachep) - panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index b2d9021ad22b..5730333cd0ac 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -12,7 +12,6 @@ * this behaves the same as the original Reno. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> @@ -232,7 +231,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_compound.c b/net/ipv4/tcp_compound.c deleted file mode 100644 index bc54f7e9aea9..000000000000 --- a/net/ipv4/tcp_compound.c +++ /dev/null @@ -1,448 +0,0 @@ -/* - * TCP Vegas congestion control - * - * This is based on the congestion detection/avoidance scheme described in - * Lawrence S. Brakmo and Larry L. Peterson. - * "TCP Vegas: End to end congestion avoidance on a global internet." - * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480, - * October 1995. Available from: - * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps - * - * See http://www.cs.arizona.edu/xkernel/ for their implementation. - * The main aspects that distinguish this implementation from the - * Arizona Vegas implementation are: - * o We do not change the loss detection or recovery mechanisms of - * Linux in any way. Linux already recovers from losses quite well, - * using fine-grained timers, NewReno, and FACK. - * o To avoid the performance penalty imposed by increasing cwnd - * only every-other RTT during slow start, we increase during - * every RTT during slow start, just like Reno. - * o Largely to allow continuous cwnd growth during slow start, - * we use the rate at which ACKs come back as the "actual" - * rate, rather than the rate at which data is sent. - * o To speed convergence to the right rate, we set the cwnd - * to achieve the right ("actual") rate when we exit slow start. - * o To filter out the noise caused by delayed ACKs, we use the - * minimum RTT sample observed during the last RTT to calculate - * the actual rate. - * o When the sender re-starts from idle, it waits until it has - * received ACKs for an entire flight of new data before making - * a cwnd adjustment decision. The original Vegas implementation - * assumed senders never went idle. - * - * - * TCP Compound based on TCP Vegas - * - * further details can be found here: - * ftp://ftp.research.microsoft.com/pub/tr/TR-2005-86.pdf - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/inet_diag.h> - -#include <net/tcp.h> - -/* Default values of the Vegas variables, in fixed-point representation - * with V_PARAM_SHIFT bits to the right of the binary point. - */ -#define V_PARAM_SHIFT 1 - -#define TCP_COMPOUND_ALPHA 3U -#define TCP_COMPOUND_BETA 1U -#define TCP_COMPOUND_GAMMA 30 -#define TCP_COMPOUND_ZETA 1 - -/* TCP compound variables */ -struct compound { - u32 beg_snd_nxt; /* right edge during last RTT */ - u32 beg_snd_una; /* left edge during last RTT */ - u32 beg_snd_cwnd; /* saves the size of the cwnd */ - u8 doing_vegas_now; /* if true, do vegas for this RTT */ - u16 cntRTT; /* # of RTTs measured within last RTT */ - u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ - u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ - - u32 cwnd; - u32 dwnd; -}; - -/* There are several situations when we must "re-start" Vegas: - * - * o when a connection is established - * o after an RTO - * o after fast recovery - * o when we send a packet and there is no outstanding - * unacknowledged data (restarting an idle connection) - * - * In these circumstances we cannot do a Vegas calculation at the - * end of the first RTT, because any calculation we do is using - * stale info -- both the saved cwnd and congestion feedback are - * stale. - * - * Instead we must wait until the completion of an RTT during - * which we actually receive ACKs. - */ -static inline void vegas_enable(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct compound *vegas = inet_csk_ca(sk); - - /* Begin taking Vegas samples next time we send something. */ - vegas->doing_vegas_now = 1; - - /* Set the beginning of the next send window. */ - vegas->beg_snd_nxt = tp->snd_nxt; - - vegas->cntRTT = 0; - vegas->minRTT = 0x7fffffff; -} - -/* Stop taking Vegas samples for now. */ -static inline void vegas_disable(struct sock *sk) -{ - struct compound *vegas = inet_csk_ca(sk); - - vegas->doing_vegas_now = 0; -} - -static void tcp_compound_init(struct sock *sk) -{ - struct compound *vegas = inet_csk_ca(sk); - const struct tcp_sock *tp = tcp_sk(sk); - - vegas->baseRTT = 0x7fffffff; - vegas_enable(sk); - - vegas->dwnd = 0; - vegas->cwnd = tp->snd_cwnd; -} - -/* Do RTT sampling needed for Vegas. - * Basically we: - * o min-filter RTT samples from within an RTT to get the current - * propagation delay + queuing delay (we are min-filtering to try to - * avoid the effects of delayed ACKs) - * o min-filter RTT samples from a much longer window (forever for now) - * to find the propagation delay (baseRTT) - */ -static void tcp_compound_rtt_calc(struct sock *sk, u32 usrtt) -{ - struct compound *vegas = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ - - /* Filter to find propagation delay: */ - if (vrtt < vegas->baseRTT) - vegas->baseRTT = vrtt; - - /* Find the min RTT during the last RTT to find - * the current prop. delay + queuing delay: - */ - - vegas->minRTT = min(vegas->minRTT, vrtt); - vegas->cntRTT++; -} - -static void tcp_compound_state(struct sock *sk, u8 ca_state) -{ - - if (ca_state == TCP_CA_Open) - vegas_enable(sk); - else - vegas_disable(sk); -} - - -/* 64bit divisor, dividend and result. dynamic precision */ -static inline u64 div64_64(u64 dividend, u64 divisor) -{ - u32 d = divisor; - - if (divisor > 0xffffffffULL) { - unsigned int shift = fls(divisor >> 32); - - d = divisor >> shift; - dividend >>= shift; - } - - /* avoid 64 bit division if possible */ - if (dividend >> 32) - do_div(dividend, d); - else - dividend = (u32) dividend / d; - - return dividend; -} - -/* calculate the quartic root of "a" using Newton-Raphson */ -static u32 qroot(u64 a) -{ - u32 x, x1; - - /* Initial estimate is based on: - * qrt(x) = exp(log(x) / 4) - */ - x = 1u << (fls64(a) >> 2); - - /* - * Iteration based on: - * 3 - * x = ( 3 * x + a / x ) / 4 - * k+1 k k - */ - do { - u64 x3 = x; - - x1 = x; - x3 *= x; - x3 *= x; - - x = (3 * x + (u32) div64_64(a, x3)) / 4; - } while (abs(x1 - x) > 1); - - return x; -} - - -/* - * If the connection is idle and we are restarting, - * then we don't want to do any Vegas calculations - * until we get fresh RTT samples. So when we - * restart, we reset our Vegas state to a clean - * slate. After we get acks for this flight of - * packets, _then_ we can make Vegas calculations - * again. - */ -static void tcp_compound_cwnd_event(struct sock *sk, enum tcp_ca_event event) -{ - if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) - tcp_compound_init(sk); -} - -static void tcp_compound_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct compound *vegas = inet_csk_ca(sk); - u8 inc = 0; - - if (vegas->cwnd + vegas->dwnd > tp->snd_cwnd) { - if (vegas->cwnd > tp->snd_cwnd || vegas->dwnd > tp->snd_cwnd) { - vegas->cwnd = tp->snd_cwnd; - vegas->dwnd = 0; - } else - vegas->cwnd = tp->snd_cwnd - vegas->dwnd; - - } - - if (!tcp_is_cwnd_limited(sk, in_flight)) - return; - - if (vegas->cwnd <= tp->snd_ssthresh) - inc = 1; - else if (tp->snd_cwnd_cnt < tp->snd_cwnd) - tp->snd_cwnd_cnt++; - - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - inc = 1; - tp->snd_cwnd_cnt = 0; - } - - if (inc && tp->snd_cwnd < tp->snd_cwnd_clamp) - vegas->cwnd++; - - /* The key players are v_beg_snd_una and v_beg_snd_nxt. - * - * These are so named because they represent the approximate values - * of snd_una and snd_nxt at the beginning of the current RTT. More - * precisely, they represent the amount of data sent during the RTT. - * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, - * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding - * bytes of data have been ACKed during the course of the RTT, giving - * an "actual" rate of: - * - * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) - * - * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, - * because delayed ACKs can cover more than one segment, so they - * don't line up nicely with the boundaries of RTTs. - * - * Another unfortunate fact of life is that delayed ACKs delay the - * advance of the left edge of our send window, so that the number - * of bytes we send in an RTT is often less than our cwnd will allow. - * So we keep track of our cwnd separately, in v_beg_snd_cwnd. - */ - - if (after(ack, vegas->beg_snd_nxt)) { - /* Do the Vegas once-per-RTT cwnd adjustment. */ - u32 old_wnd, old_snd_cwnd; - - /* Here old_wnd is essentially the window of data that was - * sent during the previous RTT, and has all - * been acknowledged in the course of the RTT that ended - * with the ACK we just received. Likewise, old_snd_cwnd - * is the cwnd during the previous RTT. - */ - if (!tp->mss_cache) - return; - - old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) / - tp->mss_cache; - old_snd_cwnd = vegas->beg_snd_cwnd; - - /* Save the extent of the current window so we can use this - * at the end of the next RTT. - */ - vegas->beg_snd_una = vegas->beg_snd_nxt; - vegas->beg_snd_nxt = tp->snd_nxt; - vegas->beg_snd_cwnd = tp->snd_cwnd; - - /* We do the Vegas calculations only if we got enough RTT - * samples that we can be reasonably sure that we got - * at least one RTT sample that wasn't from a delayed ACK. - * If we only had 2 samples total, - * then that means we're getting only 1 ACK per RTT, which - * means they're almost certainly delayed ACKs. - * If we have 3 samples, we should be OK. - */ - - if (vegas->cntRTT > 2) { - u32 rtt, target_cwnd, diff; - u32 brtt, dwnd; - - /* We have enough RTT samples, so, using the Vegas - * algorithm, we determine if we should increase or - * decrease cwnd, and by how much. - */ - - /* Pluck out the RTT we are using for the Vegas - * calculations. This is the min RTT seen during the - * last RTT. Taking the min filters out the effects - * of delayed ACKs, at the cost of noticing congestion - * a bit later. - */ - rtt = vegas->minRTT; - - /* Calculate the cwnd we should have, if we weren't - * going too fast. - * - * This is: - * (actual rate in segments) * baseRTT - * We keep it as a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary point. - */ - if (!rtt) - return; - - brtt = vegas->baseRTT; - target_cwnd = ((old_wnd * brtt) - << V_PARAM_SHIFT) / rtt; - - /* Calculate the difference between the window we had, - * and the window we would like to have. This quantity - * is the "Diff" from the Arizona Vegas papers. - * - * Again, this is a fixed point number with - * V_PARAM_SHIFT bits to the right of the binary - * point. - */ - - diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; - - dwnd = vegas->dwnd; - - if (diff < (TCP_COMPOUND_GAMMA << V_PARAM_SHIFT)) { - u64 v; - u32 x; - - /* - * The TCP Compound paper describes the choice - * of "k" determines the agressiveness, - * ie. slope of the response function. - * - * For same value as HSTCP would be 0.8 - * but for computaional reasons, both the - * original authors and this implementation - * use 0.75. - */ - v = old_wnd; - x = qroot(v * v * v) >> TCP_COMPOUND_ALPHA; - if (x > 1) - dwnd = x - 1; - else - dwnd = 0; - - dwnd += vegas->dwnd; - - } else if ((dwnd << V_PARAM_SHIFT) < - (diff * TCP_COMPOUND_BETA)) - dwnd = 0; - else - dwnd = - ((dwnd << V_PARAM_SHIFT) - - (diff * - TCP_COMPOUND_BETA)) >> V_PARAM_SHIFT; - - vegas->dwnd = dwnd; - - } - - /* Wipe the slate clean for the next RTT. */ - vegas->cntRTT = 0; - vegas->minRTT = 0x7fffffff; - } - - tp->snd_cwnd = vegas->cwnd + vegas->dwnd; -} - -/* Extract info for Tcp socket info provided via netlink. */ -static void tcp_compound_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) -{ - const struct compound *ca = inet_csk_ca(sk); - if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info *info; - - info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, - sizeof(*info))); - - info->tcpv_enabled = ca->doing_vegas_now; - info->tcpv_rttcnt = ca->cntRTT; - info->tcpv_rtt = ca->baseRTT; - info->tcpv_minrtt = ca->minRTT; - rtattr_failure:; - } -} - -static struct tcp_congestion_ops tcp_compound = { - .init = tcp_compound_init, - .ssthresh = tcp_reno_ssthresh, - .cong_avoid = tcp_compound_cong_avoid, - .rtt_sample = tcp_compound_rtt_calc, - .set_state = tcp_compound_state, - .cwnd_event = tcp_compound_cwnd_event, - .get_info = tcp_compound_get_info, - - .owner = THIS_MODULE, - .name = "compound", -}; - -static int __init tcp_compound_register(void) -{ - BUG_ON(sizeof(struct compound) > ICSK_CA_PRIV_SIZE); - tcp_register_congestion_control(&tcp_compound); - return 0; -} - -static void __exit tcp_compound_unregister(void) -{ - tcp_unregister_congestion_control(&tcp_compound); -} - -module_init(tcp_compound_register); -module_exit(tcp_compound_unregister); - -MODULE_AUTHOR("Angelo P. Castellani, Stephen Hemminger"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("TCP Compound"); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 857eefc52aab..af0aca1e6be6 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -6,7 +6,6 @@ * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> @@ -49,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) printk(KERN_NOTICE "TCP %s already registered\n", ca->name); ret = -EEXIST; } else { - list_add_rcu(&ca->list, &tcp_cong_list); + list_add_tail_rcu(&ca->list, &tcp_cong_list); printk(KERN_INFO "TCP %s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); @@ -190,7 +189,7 @@ void tcp_slow_start(struct tcp_sock *tp) return; /* We MAY increase by 2 if discovered delayed ack */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { + if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) { if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 78b7a6b9e4de..a60ef38d75c6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -12,7 +12,6 @@ * this behaves the same as the original Reno. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> @@ -359,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = { static int __init cubictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index c148c1081880..57c5f0b10e6c 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -11,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/inet_diag.h> @@ -26,7 +25,10 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; - r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + if (sk->sk_state == TCP_LISTEN) + r->idiag_rqueue = sk->sk_ack_backlog; + else + r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->idiag_wqueue = tp->write_seq - tp->snd_una; if (info != NULL) tcp_get_info(sk, info); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 1120245b2373..c4fc811bf377 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -6,7 +6,6 @@ * John Heffner <jheffner@psc.edu> */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> @@ -140,14 +139,19 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, tp->snd_cwnd++; } } else { - /* Update AIMD parameters */ + /* Update AIMD parameters. + * + * We want to guarantee that: + * hstcp_aimd_vals[ca->ai-1].cwnd < + * snd_cwnd <= + * hstcp_aimd_vals[ca->ai].cwnd + */ if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX - 1) ca->ai++; - } else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) { - while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && - ca->ai > 0) + } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) { + while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) ca->ai--; } @@ -185,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 3d92c1859267..682e7d5b6f2f 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -6,7 +6,6 @@ * http://www.hamilton.ie/net/htcp3.pdf */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> @@ -287,7 +286,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); return tcp_register_congestion_control(&htcp); } diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 40dbb3877510..59e691d26f64 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -10,7 +10,6 @@ * root at danielinux.net */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> @@ -171,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 94fe5b1f9dcb..3f884cea14ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -63,7 +63,6 @@ * Pasi Sarolahti: F-RTO for dealing with spurious RTOs */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> @@ -73,24 +72,24 @@ #include <asm/unaligned.h> #include <net/netdma.h> -int sysctl_tcp_timestamps = 1; -int sysctl_tcp_window_scaling = 1; -int sysctl_tcp_sack = 1; -int sysctl_tcp_fack = 1; -int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; -int sysctl_tcp_ecn; -int sysctl_tcp_dsack = 1; -int sysctl_tcp_app_win = 31; -int sysctl_tcp_adv_win_scale = 2; - -int sysctl_tcp_stdurg; -int sysctl_tcp_rfc1337; -int sysctl_tcp_max_orphans = NR_FILE; -int sysctl_tcp_frto; -int sysctl_tcp_nometrics_save; - -int sysctl_tcp_moderate_rcvbuf = 1; -int sysctl_tcp_abc = 1; +int sysctl_tcp_timestamps __read_mostly = 1; +int sysctl_tcp_window_scaling __read_mostly = 1; +int sysctl_tcp_sack __read_mostly = 1; +int sysctl_tcp_fack __read_mostly = 1; +int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; +int sysctl_tcp_ecn __read_mostly; +int sysctl_tcp_dsack __read_mostly = 1; +int sysctl_tcp_app_win __read_mostly = 31; +int sysctl_tcp_adv_win_scale __read_mostly = 2; + +int sysctl_tcp_stdurg __read_mostly; +int sysctl_tcp_rfc1337 __read_mostly; +int sysctl_tcp_max_orphans __read_mostly = NR_FILE; +int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_nometrics_save __read_mostly; + +int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; +int sysctl_tcp_abc __read_mostly; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -128,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ - len = skb->len; + len = skb_shinfo(skb)->gso_size ?: skb->len; if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = len; } else { @@ -157,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, return; } } + if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } @@ -934,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; - struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); + struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; @@ -2238,13 +2239,12 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, return acked; } -static u32 tcp_usrtt(const struct sk_buff *skb) +static u32 tcp_usrtt(struct timeval *tv) { - struct timeval tv, now; + struct timeval now; do_gettimeofday(&now); - skb_get_timestamp(skb, &tv); - return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec); + return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec); } /* Remove acknowledged frames from the retransmission queue. */ @@ -2259,6 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) u32 pkts_acked = 0; void (*rtt_sample)(struct sock *sk, u32 usrtt) = icsk->icsk_ca_ops->rtt_sample; + struct timeval tv; while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { @@ -2307,8 +2308,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) seq_rtt = -1; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - if (rtt_sample) - (*rtt_sample)(sk, tcp_usrtt(skb)); + skb_get_timestamp(skb, &tv); } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); @@ -2321,8 +2321,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - if (rtt_sample) - (*rtt_sample)(sk, tcp_usrtt(skb)); + skb_get_timestamp(skb, &tv); } tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); @@ -2334,6 +2333,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) if (acked&FLAG_ACKED) { tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk, tp); + if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) + (*rtt_sample)(sk, tcp_usrtt(&tv)); if (icsk->icsk_ca_ops->pkts_acked) icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); @@ -2506,8 +2507,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) goto old_ack; - if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) - tp->bytes_acked += ack - prior_snd_una; + if (sysctl_tcp_abc) { + if (icsk->icsk_ca_state < TCP_CA_CWR) + tp->bytes_acked += ack - prior_snd_una; + else if (icsk->icsk_ca_state == TCP_CA_Loss) + /* we assume just one segment left network */ + tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache); + } if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. @@ -2623,7 +2629,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, switch(opcode) { case TCPOPT_MSS: if(opsize==TCPOLEN_MSS && th->syn && !estab) { - u16 in_mss = ntohs(get_unaligned((__u16 *)ptr)); + u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); if (in_mss) { if (opt_rx->user_mss && opt_rx->user_mss < in_mss) in_mss = opt_rx->user_mss; @@ -2651,8 +2657,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, if ((estab && opt_rx->tstamp_ok) || (!estab && sysctl_tcp_timestamps)) { opt_rx->saw_tstamp = 1; - opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr)); - opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4))); + opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); + opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); } } break; @@ -2689,8 +2695,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 0; } else if (tp->rx_opt.tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { - __u32 *ptr = (__u32 *)(th + 1); - if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + __be32 *ptr = (__be32 *)(th + 1); + if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { tp->rx_opt.saw_tstamp = 1; ++ptr; @@ -3542,7 +3548,8 @@ void tcp_cwnd_application_limited(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ - u32 win_used = max(tp->snd_cwnd_used, 2U); + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; @@ -3904,10 +3911,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Check timestamp */ if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - __u32 *ptr = (__u32 *)(th + 1); + __be32 *ptr = (__be32 *)(th + 1); /* No? Slow path! */ - if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) goto slow_path; @@ -4178,8 +4185,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, */ TCP_ECN_rcv_synack(tp, th); - if (tp->ecn_flags&TCP_ECN_OK) - sock_set_flag(sk, SOCK_NO_LARGESEND); tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tcp_ack(sk, skb, FLAG_SLOWPATH); @@ -4322,8 +4327,6 @@ discard: tp->max_window = tp->snd_wnd; TCP_ECN_rcv_syn(tp, th); - if (tp->ecn_flags&TCP_ECN_OK) - sock_set_flag(sk, SOCK_NO_LARGESEND); tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 25ecc6e2478b..c83938b8fcb1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -52,7 +52,6 @@ * a single port at the same time. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -79,8 +78,8 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> -int sysctl_tcp_tw_reuse; -int sysctl_tcp_low_latency; +int sysctl_tcp_tw_reuse __read_mostly; +int sysctl_tcp_low_latency __read_mostly; /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 @@ -91,7 +90,7 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), .lhash_users = ATOMIC_INIT(0), .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), }; @@ -160,7 +159,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct tcp_sock *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; - u32 daddr, nexthop; + __be32 daddr, nexthop; int tmp; int err; @@ -242,6 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ + sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) @@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) It can f.e. if SYNs crossed. */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -485,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -496,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) } } +int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = skb->nh.iph; + th = skb->h.th; + + th->check = 0; + th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); + skb->csum = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + return 0; +} + /* * This routine will send an RST to the other tcp. * @@ -717,8 +734,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct inet_request_sock *ireq; struct tcp_options_received tmp_opt; struct request_sock *req; - __u32 saddr = skb->nh.iph->saddr; - __u32 daddr = skb->nh.iph->daddr; + __be32 saddr = skb->nh.iph->saddr; + __be32 daddr = skb->nh.iph->daddr; __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES @@ -781,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; @@ -856,7 +876,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; } @@ -884,6 +903,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (!newsk) goto exit; + newsk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(newsk, dst); newtp = tcp_sk(newsk); @@ -931,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, - th->source, skb->nh.iph->daddr, - ntohs(th->dest), inet_iif(skb)); + nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + th->dest, inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -953,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static int tcp_v4_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1070,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), + skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (!sk) @@ -1084,12 +1104,12 @@ process: goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; - bh_lock_sock(sk); + bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { #ifdef CONFIG_NET_DMA @@ -1148,7 +1168,7 @@ do_time_wait: case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, - ntohs(th->dest), + th->dest, inet_iif(skb)); if (sk2) { inet_twsk_deschedule((struct inet_timewait_sock *)sk, @@ -1621,10 +1641,9 @@ static int tcp_seq_open(struct inode *inode, struct file *file) if (unlikely(afinfo == NULL)) return -EINVAL; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; - memset(s, 0, sizeof(*s)); s->family = afinfo->family; s->seq_ops.start = tcp_seq_start; s->seq_ops.next = tcp_seq_next; @@ -1726,7 +1745,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5d %8d %lu %d %p %u %u %u %u %d", i, src, srcp, dest, destp, sp->sk_state, - tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, + tp->write_seq - tp->snd_una, + (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, @@ -1743,7 +1763,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) { - unsigned int dest, src; + __be32 dest, src; __u16 destp, srcp; int ttd = tw->tw_ttd - jiffies; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 1f977b6ee9a1..f0ebaf0e21cb 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -3,13 +3,8 @@ * * TCP Low Priority is a distributed algorithm whose goal is to utilize only * the excess network bandwidth as compared to the ``fair share`` of - * bandwidth as targeted by TCP. Available from: - * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf + * bandwidth as targeted by TCP. * - * Original Author: - * Aleksandar Kuzmanovic <akuzma@northwestern.edu> - * - * See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation. * As of 2.6.13, Linux supports pluggable congestion control algorithms. * Due to the limitation of the API, we take the following changes from * the original TCP-LP implementation: @@ -24,14 +19,20 @@ * o OWD is handled in relative format, where local time stamp will in * tcp_time_stamp format. * - * Port from 2.4.19 to 2.6.16 as module by: - * Wong Hoi Sing Edison <hswong3i@gmail.com> - * Hung Hing Lun <hlhung3i@gmail.com> + * Original Author: + * Aleksandar Kuzmanovic <akuzma@northwestern.edu> + * Available from: + * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf + * Original implementation for 2.4.19: + * http://www-ece.rice.edu/networks/TCP-LP/ * - * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $ + * 2.6.x module Authors: + * Wong Hoi Sing, Edison <hswong3i@gmail.com> + * Hung Hing Lun, Mike <hlhung3i@gmail.com> + * SourceForge project page: + * http://tcp-lp-mod.sourceforge.net/ */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> @@ -153,16 +154,19 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) if (m < 0) m = -m; - if (rhz != 0) { + if (rhz > 0) { m -= rhz >> 6; /* m is now error in remote HZ est */ rhz += m; /* 63/64 old + 1/64 new */ } else rhz = m << 6; + out: /* record time for successful remote HZ calc */ - lp->flag |= LP_VALID_RHZ; + if ((rhz >> 6) > 0) + lp->flag |= LP_VALID_RHZ; + else + lp->flag &= ~LP_VALID_RHZ; - out: /* record reference time stamp */ lp->remote_ref_time = tp->rx_opt.rcv_tsval; lp->local_ref_time = tp->rx_opt.rcv_tsecr; @@ -321,7 +325,7 @@ static struct tcp_congestion_ops tcp_lp = { static int __init tcp_lp_register(void) { - BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_lp); } @@ -333,6 +337,6 @@ static void __exit tcp_lp_unregister(void) module_init(tcp_lp_register); module_exit(tcp_lp_unregister); -MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun"); +MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP Low Priority"); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2b9b7f6c7f7c..0163d9826907 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -20,7 +20,6 @@ * Jorge Cwik, <jorge@laser.satlink.net> */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> @@ -35,13 +34,13 @@ #define SYNC_INIT 1 #endif -int sysctl_tcp_syncookies = SYNC_INIT; -int sysctl_tcp_abort_on_overflow; +int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; +int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = SPIN_LOCK_UNLOCKED, + .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), .hashinfo = &tcp_hashinfo, .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, (unsigned long)&tcp_death_row), @@ -440,8 +439,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); - if (newtp->ecn_flags&TCP_ECN_OK) - sock_set_flag(newsk, SOCK_NO_LARGESEND); TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); } @@ -592,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; + } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdd71db8bf90..9a253faefc81 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -43,24 +43,24 @@ #include <linux/smp_lock.h> /* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse = 1; +int sysctl_tcp_retrans_collapse __read_mostly = 1; /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ -int sysctl_tcp_workaround_signed_windows = 0; +int sysctl_tcp_workaround_signed_windows __read_mostly = 0; /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. */ -int sysctl_tcp_tso_win_divisor = 3; +int sysctl_tcp_tso_win_divisor __read_mostly = 3; -int sysctl_tcp_mtu_probing = 0; -int sysctl_tcp_base_mss = 512; +int sysctl_tcp_mtu_probing __read_mostly = 0; +int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle = 1; +int sysctl_tcp_slow_start_after_idle __read_mostly = 1; static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) @@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * See RFC1323 for an explanation of the limit to 14 */ space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; (*rcv_wscale)++; @@ -268,7 +269,7 @@ static u16 tcp_select_window(struct sock *sk) return new_win; } -static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, +static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp) { if (tp->rx_opt.tstamp_ok) { @@ -304,7 +305,7 @@ static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, * MAX_SYN_SIZE to match the new maximum number of options that you * can generate. */ -static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, +static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) { @@ -423,7 +424,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->dest = inet->dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); - *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | + *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { @@ -444,7 +445,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { - tcp_syn_build_options((__u32 *)(th + 1), + tcp_syn_build_options((__be32 *)(th + 1), tcp_advertise_mss(sk), (sysctl_flags & SYSCTL_FLAG_TSTAMPS), (sysctl_flags & SYSCTL_FLAG_SACK), @@ -453,7 +454,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcb->when, tp->rx_opt.ts_recent); } else { - tcp_build_and_update_options((__u32 *)(th + 1), + tcp_build_and_update_options((__be32 *)(th + 1), tp, tcb->when); TCP_ECN_send(sk, tp, skb, tcp_header_size); } @@ -466,7 +467,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) + TCP_INC_STATS(TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) @@ -510,8 +512,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || - !(sk->sk_route_caps & NETIF_F_TSO)) { + if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -525,7 +526,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned factor /= mss_now; skb_shinfo(skb)->gso_segs = factor; skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } @@ -576,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { + if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), nsize, 0); @@ -585,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss skb->csum = csum_block_sub(skb->csum, buff->csum, len); } else { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); } @@ -688,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->truesize -= len; sk->sk_wmem_queued -= len; @@ -824,9 +825,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && - (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) doing_tso = 1; if (dst) { @@ -1063,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; - buff->ip_summed = skb->ip_summed = CHECKSUM_HW; + buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ @@ -1207,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - if (skb->ip_summed == CHECKSUM_HW) - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = skb->ip_summed; len = 0; while (len < probe_size) { @@ -1232,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk) ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_partial(skb->data, skb->len, 0); } else { __pskb_trim_head(skb, copy); @@ -1573,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - if (next_skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = next_skb->ip_summed; - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ @@ -2044,8 +2041,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - if (dst->dev->features&NETIF_F_TSO) - ireq->ecn_ok = 0; TCP_ECN_make_synack(req, th); th->source = inet_sk(sk)->sport; th->dest = ireq->rmt_port; @@ -2075,7 +2070,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->window = htons(req->rcv_wnd); TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, + tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, TCP_SKB_CB(skb)->when, req->ts_recent); @@ -2162,10 +2157,9 @@ int tcp_connect(struct sock *sk) skb_shinfo(buff)->gso_size = 0; skb_shinfo(buff)->gso_type = 0; buff->csum = 0; + tp->snd_nxt = tp->write_seq; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; - tp->snd_nxt = tp->write_seq; - tp->pushed_seq = tp->write_seq; /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; @@ -2175,6 +2169,12 @@ int tcp_connect(struct sock *sk) sk_charge_skb(sk, buff); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + + /* We change tp->snd_nxt after the tcp_transmit_skb() call + * in order to make this packet get counted in tcpOutSegs. + */ + tp->snd_nxt = tp->write_seq; + tp->pushed_seq = tp->write_seq; TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d7d517a3a238..dab37d2f65fc 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - int error = 0, cnt; + int error = 0, cnt = 0; unsigned char *tbuf; if (!buf || len < 0) @@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, error = wait_event_interruptible(tcpw.wait, __kfifo_len(tcpw.fifo) != 0); if (error) - return error; + goto out_free; cnt = kfifo_get(tcpw.fifo, tbuf, len); error = copy_to_user(buf, tbuf, cnt); +out_free: vfree(tbuf); return error ? error : cnt; diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 26d7486ee501..4624501e9680 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -5,7 +5,6 @@ * John Heffner <jheffner@sc.edu> */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7c1bde3cd6cb..fb09ade5897b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -23,14 +23,14 @@ #include <linux/module.h> #include <net/tcp.h> -int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; -int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; -int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; -int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; -int sysctl_tcp_retries1 = TCP_RETR1; -int sysctl_tcp_retries2 = TCP_RETR2; -int sysctl_tcp_orphan_retries; +int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; +int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; +int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; +int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; +int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; +int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; +int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; +int sysctl_tcp_orphan_retries __read_mostly; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 3b7403495052..a3b7aa015a2f 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -31,7 +31,6 @@ * assumed senders never went idle. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -371,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 11b42a7135c1..ce57bf302f6c 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -9,7 +9,6 @@ * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = { static int __init tcp_veno_register(void) { - BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_veno); return 0; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 4247da1384bf..4f42a86c77f3 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -21,7 +21,6 @@ * ssthresh after packet loss. The probing phase is as the original Reno. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -290,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3f93292b0ad8..6d6142f9c478 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -91,7 +91,6 @@ #include <linux/errno.h> #include <linux/timer.h> #include <linux/mm.h> -#include <linux/config.h> #include <linux/inet.h> #include <linux/ipv6.h> #include <linux/netdevice.h> @@ -119,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -/* Shared by v4/v6 udp. */ -int udp_port_rover; +static int udp_port_rover; -static int udp_v4_get_port(struct sock *sk, unsigned short snum) +static inline int udp_lport_inuse(u16 num) +{ + struct sock *sk; + struct hlist_node *node; + + sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + if (inet_sk(sk)->num == num) + return 1; + return 0; +} + +/** + * udp_get_port - common port lookup for IPv4 and IPv6 + * + * @sk: socket struct in question + * @snum: port number to look up + * @saddr_comp: AF-dependent comparison of bound local IP addresses + */ +int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) { struct hlist_node *node; + struct hlist_head *head; struct sock *sk2; - struct inet_sock *inet = inet_sk(sk); + int error = 1; write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -138,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct hlist_head *list; int size; - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { + head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -150,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) goto gotit; } size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; + sk_for_each(sk2, node, head) + if (++size < best_size_so_far) { + best_size_so_far = size; + best = result; + } } result = best; for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { @@ -171,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) gotit: udp_port_rover = snum = result; } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet2 = inet_sk(sk2); - - if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && - (!sk2->sk_reuse || !sk->sk_reuse)) + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + + sk_for_each(sk2, node, head) + if (inet_sk(sk2)->num == snum && + sk2 != sk && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_cmp)(sk, sk2) ) goto fail; - } } - inet->num = snum; + inet_sk(sk)->num = snum; if (sk_unhashed(sk)) { - struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - - sk_add_node(sk, h); + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } - write_unlock_bh(&udp_hash_lock); - return 0; - + error = 0; fail: write_unlock_bh(&udp_hash_lock); - return 1; + return error; +} + +static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); } +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + + static void udp_v4_hash(struct sock *sk) { BUG(); @@ -221,8 +243,8 @@ static void udp_v4_unhash(struct sock *sk) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) +static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -266,8 +288,8 @@ static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, return result; } -static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) +static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) { struct sock *sk; @@ -280,8 +302,8 @@ static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, } static inline struct sock *udp_v4_mcast_next(struct sock *sk, - u16 loc_port, u32 loc_addr, - u16 rmt_port, u32 rmt_addr, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, int dif) { struct hlist_node *node; @@ -430,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) /* * Only one fragment on the socket. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { skb->csum = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, IPPROTO_UDP, 0); @@ -449,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) * fragments on the socket so that all csums of sk_buffs * should be together. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int offset = (unsigned char *)uh - skb->data; skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -476,7 +498,7 @@ out: } -static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base) +static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base) { return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); } @@ -491,8 +513,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct rtable *rt = NULL; int free = 0; int connected = 0; - u32 daddr, faddr, saddr; - u16 dport; + __be32 daddr, faddr, saddr; + __be16 dport; u8 tos; int err; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; @@ -604,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -662,6 +685,16 @@ out: UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + } return err; do_confirm: @@ -898,7 +931,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) int iphlen, len; __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); - __u32 *udpdata32 = (__u32 *)udpdata; + __be32 *udpdata32 = (__be32 *)udpdata; __u16 encap_type = up->encap_type; /* if we're overly short, let UDP handle it */ @@ -981,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); + int rc; /* * Charge it to the socket, dropping if the queue is full. @@ -1027,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (sock_queue_rcv_skb(sk,skb)<0) { + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; @@ -1043,7 +1080,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) * so we don't need to lock the hashes. */ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, - u32 saddr, u32 daddr) + __be32 saddr, __be32 daddr) { struct sock *sk; int dif; @@ -1084,11 +1121,11 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, * including udp header and folding it to skb->csum. */ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, u32 saddr, u32 daddr) + unsigned short ulen, __be32 saddr, __be32 daddr) { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_HW) { + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -1109,8 +1146,8 @@ int udp_rcv(struct sk_buff *skb) struct udphdr *uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; - u32 saddr = skb->nh.iph->saddr; - u32 daddr = skb->nh.iph->daddr; + __be32 saddr = skb->nh.iph->saddr; + __be32 daddr = skb->nh.iph->daddr; int len = skb->len; /* @@ -1469,11 +1506,10 @@ static int udp_seq_open(struct inode *inode, struct file *file) struct udp_seq_afinfo *afinfo = PDE(inode)->data; struct seq_file *seq; int rc = -ENOMEM; - struct udp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; - memset(s, 0, sizeof(*s)); s->family = afinfo->family; s->seq_ops.start = udp_seq_start; s->seq_ops.next = udp_seq_next; @@ -1527,8 +1563,8 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) { struct inet_sock *inet = inet_sk(sp); - unsigned int dest = inet->daddr; - unsigned int src = inet->rcv_saddr; + __be32 dest = inet->daddr; + __be32 src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); @@ -1583,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_port_rover); +EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 817ed84511a6..8655d038364c 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,7 +23,7 @@ int xfrm4_rcv(struct sk_buff *skb) EXPORT_SYMBOL(xfrm4_rcv); -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { switch (nexthdr) { case IPPROTO_IPIP: @@ -55,7 +55,7 @@ drop: int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; - u32 spi, seq; + __be32 spi, seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; @@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index a9e6b3dd19c9..92676b7e4034 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -21,9 +21,8 @@ * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_transport_output(struct sk_buff *skb) +static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x; struct iphdr *iph; int ihl; @@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb) ihl = iph->ihl * 4; skb->h.raw += ihl; - x = skb->dst->xfrm; skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index f8d880beb12f..e23c21d31a53 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_tunnel_output(struct sk_buff *skb) +static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; int flags; @@ -92,7 +91,6 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); err = 0; out: diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 193363e22932..04403fb01a58 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; @@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; @@ -134,7 +134,7 @@ static int xfrm4_output_finish(struct sk_buff *skb) } #endif - if (!skb_shinfo(skb)->gso_size) + if (!skb_is_gso(skb)) return xfrm4_output_finish2(skb); skb->protocol = htons(ETH_P_IP); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c0465284dfac..7a7a00147e55 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -9,7 +9,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/inetdevice.h> #include <net/xfrm.h> #include <net/ip.h> @@ -22,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return __ip_route_output_key((struct rtable**)dst, fl); } +static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rtable *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip4_u = { + .daddr = daddr->a4, + }, + }, + }; + + if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + saddr->a4 = rt->rt_src; + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -34,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(xdst, fl, AF_INET)) { + xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { dst_clone(dst); break; } @@ -94,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = xfrm[i]->id.daddr.a4; local = xfrm[i]->props.saddr.a4; tunnel = 1; @@ -136,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = 0; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -201,7 +221,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_ESP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u32 *ehdr = (u32 *)xprth; + __be32 *ehdr = (__be32 *)xprth; fl->fl_ipsec_spi = ehdr[0]; } @@ -209,7 +229,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_AH: if (pskb_may_pull(skb, xprth + 8 - skb->data)) { - u32 *ah_hdr = (u32*)xprth; + __be32 *ah_hdr = (__be32*)xprth; fl->fl_ipsec_spi = ah_hdr[1]; } @@ -217,7 +237,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_COMP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u16 *ipcomp_hdr = (u16 *)xprth; + __be16 *ipcomp_hdr = (__be16 *)xprth; fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } @@ -297,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, .dst_ops = &xfrm4_dst_ops, .dst_lookup = xfrm4_dst_lookup, + .get_saddr = xfrm4_get_saddr, .find_bundle = __xfrm4_find_bundle, .bundle_create = __xfrm4_bundle_create, .decode_session = _decode_session4, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 81e1751c966e..3cc3df0c6ece 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -29,9 +29,9 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.daddr.a4 = fl->fl4_dst; x->sel.saddr.a4 = fl->fl4_src; x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = ~0; + x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = ~0; + x->sel.sport_mask = htons(0xffff); x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; @@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode && x->props.saddr.a4 == 0) { - struct rtable *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip4_u = { - .daddr = x->id.daddr.a4, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET)) { - x->props.saddr.a4 = rt->rt_src; - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET; } -static struct xfrm_state * -__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm4_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET && - spi == x->id.spi && - daddr->a4 == x->id.daddr.a4 && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -static struct xfrm_state * -__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm4_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET && - daddr->a4 == x->id.daddr.a4 && - mode == x->props.mode && - proto == x->id.proto && - saddr->a4 == x->props.saddr.a4 && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - x0->sel.daddr.a4 = daddr->a4; - x0->sel.saddr.a4 = saddr->a4; - x0->sel.prefixlen_d = 32; - x0->sel.prefixlen_s = 32; - x0->props.saddr.a4 = saddr->a4; - x0->km.state = XFRM_STATE_ACQ; - x0->id.daddr.a4 = daddr->a4; - x0->id.proto = proto; - x0->props.family = AF_INET; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->props.family = AF_INET; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, - .state_lookup = __xfrm4_state_lookup, - .find_acq = __xfrm4_find_acq, }; void __init xfrm4_state_init(void) diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index f8ceaa127c83..f110af5b1319 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) static int ipip_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e923d4dea418..a2d211da2aba 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -77,6 +77,7 @@ config INET6_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- @@ -97,6 +98,15 @@ config INET6_IPCOMP If unsure, say Y. +config IPV6_MIP6 + bool "IPv6: Mobility (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + select XFRM + ---help--- + Support for IPv6 Mobility described in RFC 3775. + + If unsure, say N. + config INET6_XFRM_TUNNEL tristate select INET6_TUNNEL @@ -126,6 +136,13 @@ config INET6_XFRM_MODE_TUNNEL If unsure, say Y. +config INET6_XFRM_MODE_ROUTEOPTIMIZATION + tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + select XFRM + ---help--- + Support for MIPv6 route optimization mode. + config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" select INET6_TUNNEL @@ -135,3 +152,31 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_SUBTREES + bool "IPv6: source address based routing" + depends on IPV6 && EXPERIMENTAL + ---help--- + Enable routing by source address or prefix. + + The destination address is still the primary routing key, so mixing + normal and source prefix specific routes in the same routing table + may sometimes lead to unintended routing behavior. This can be + avoided by defining different routing tables for the normal and + source prefix specific routes. + + If unsure, say N. + +config IPV6_MULTIPLE_TABLES + bool "IPv6: Multiple Routing Tables" + depends on IPV6 && EXPERIMENTAL + select FIB_RULES + ---help--- + Support multiple routing tables. + +config IPV6_ROUTE_FWMARK + bool "IPv6: use netfilter MARK value as routing key" + depends on IPV6_MULTIPLE_TABLES && NETFILTER + ---help--- + If you say Y here, you will be able to specify different routes for + packets with different mark values (see iptables(8), MARK target). + diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 386e0a626948..0213c6612b58 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -13,6 +13,9 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o +ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o +ipv6-$(CONFIG_IPV6_MIP6) += mip6.o + ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o @@ -22,6 +25,7 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o +obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4da664538f52..e03c33b2465b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -40,7 +40,6 @@ * status etc. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -49,6 +48,7 @@ #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> +#include <linux/if_addr.h> #include <linux/if_arp.h> #include <linux/if_arcnet.h> #include <linux/if_infiniband.h> @@ -73,6 +73,7 @@ #include <net/addrconf.h> #include <net/tcp.h> #include <net/ip.h> +#include <net/netlink.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> @@ -118,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_RWLOCK(addrconf_hash_lock); -/* Protects inet6 devices */ -DEFINE_RWLOCK(addrconf_lock); - static void addrconf_verify(unsigned long); static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); @@ -145,7 +143,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -struct ipv6_devconf ipv6_devconf = { +struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -174,9 +172,10 @@ struct ipv6_devconf ipv6_devconf = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .proxy_ndp = 0, }; -static struct ipv6_devconf ipv6_devconf_dflt = { +static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -204,6 +203,7 @@ static struct ipv6_devconf ipv6_devconf_dflt = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .proxy_ndp = 0, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -315,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, /* Nobody refers to this device, we may destroy it. */ +static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); + kfree(idev); +} + void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; @@ -329,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) return; } snmp6_free_dev(idev); - kfree(idev); + call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } static struct inet6_dev * ipv6_add_dev(struct net_device *dev) @@ -405,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (netif_carrier_ok(dev)) ndev->if_flags |= IF_READY; - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = ndev; - write_unlock_bh(&addrconf_lock); + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, ndev); ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; @@ -471,7 +476,7 @@ static void addrconf_forward_change(void) read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { - read_lock(&addrconf_lock); + rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); @@ -479,7 +484,7 @@ static void addrconf_forward_change(void) if (changed) dev_forward_change(idev); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); } read_unlock(&dev_base_lock); } @@ -509,6 +514,26 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) kfree(ifp); } +static void +ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *ifa, **ifap; + int ifp_scope = ipv6_addr_src_scope(&ifp->addr); + + /* + * Each device address list is sorted in order of scope - + * global before linklocal. + */ + for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; + ifap = &ifa->if_next) { + if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) + break; + } + + ifp->if_next = *ifap; + *ifap = ifp; +} + /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * @@ -520,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, int hash; int err = 0; - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); if (idev->dead) { err = -ENODEV; /*XXX*/ goto out2; @@ -559,6 +584,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->flags = flags | IFA_F_TENTATIVE; ifa->cstamp = ifa->tstamp = jiffies; + ifa->rt = rt; + ifa->idev = idev; in6_dev_hold(idev); /* For caller */ @@ -574,8 +601,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&idev->lock); /* Add to inet6_dev unicast addr list. */ - ifa->if_next = idev->addr_list; - idev->addr_list = ifa; + ipv6_link_dev_addr(idev, ifa); #ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { @@ -585,12 +611,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, } #endif - ifa->rt = rt; - in6_ifa_hold(ifa); write_unlock(&idev->lock); out2: - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); if (likely(err == 0)) atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); @@ -716,7 +740,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (onlink == 0) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { rt->rt6i_expires = expires; @@ -893,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, memset(&hiscore, 0, sizeof(hiscore)); read_lock(&dev_base_lock); - read_lock(&addrconf_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev=dev->next) { struct inet6_dev *idev; @@ -988,7 +1012,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } else if (score.scope < hiscore.scope) { if (score.scope < daddr_scope) - continue; + break; /* addresses sorted by scope */ else { score.rule = 2; goto record_it; @@ -1014,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - /* Rule 4: Prefer home address -- not implemented yet */ + /* Rule 4: Prefer home address */ +#ifdef CONFIG_IPV6_MIP6 + if (hiscore.rule < 4) { + if (ifa_result->flags & IFA_F_HOMEADDRESS) + hiscore.attrs |= IPV6_SADDR_SCORE_HOA; + hiscore.rule++; + } + if (ifa->flags & IFA_F_HOMEADDRESS) { + score.attrs |= IPV6_SADDR_SCORE_HOA; + if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { + score.rule = 4; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) + continue; + } +#else if (hiscore.rule < 4) hiscore.rule++; +#endif /* Rule 5: Prefer outgoing interface */ if (hiscore.rule < 5) { @@ -1105,7 +1147,7 @@ record_it: } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); read_unlock(&dev_base_lock); if (!ifa_result) @@ -1129,7 +1171,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) struct inet6_dev *idev; int err = -EADDRNOTAVAIL; - read_lock(&addrconf_lock); + rcu_read_lock(); if ((idev = __in6_dev_get(dev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1143,7 +1185,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); return err; } @@ -1216,8 +1258,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -1444,7 +1486,7 @@ static void ipv6_regen_rndid(unsigned long data) struct inet6_dev *idev = (struct inet6_dev *) data; unsigned long expires; - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); write_lock_bh(&idev->lock); if (idev->dead) @@ -1468,7 +1510,7 @@ static void ipv6_regen_rndid(unsigned long data) out: write_unlock_bh(&idev->lock); - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); in6_dev_put(idev); } @@ -1489,59 +1531,56 @@ static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_PREFIX, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_expires = expires, + .fc_dst_len = plen, + .fc_flags = RTF_UP | flags, + }; - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx); - rtmsg.rtmsg_dst_len = plen; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_info = expires; - rtmsg.rtmsg_flags = RTF_UP|flags; - rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ipv6_addr_copy(&cfg.fc_dst, pfx); /* Prevent useless cloning on PtP SIT. This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ - if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) - rtmsg.rtmsg_flags |= RTF_NONEXTHOP; + if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) + cfg.fc_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&cfg); } /* Create "default" multicast route to the interface */ static void addrconf_add_mroute(struct net_device *dev) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_LOCAL, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_dst_len = 8, + .fc_flags = RTF_UP, + }; + + ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_set(&rtmsg.rtmsg_dst, - htonl(0xFF000000), 0, 0, 0); - rtmsg.rtmsg_dst_len = 8; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_flags = RTF_UP; - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&cfg); } static void sit_route_add(struct net_device *dev) { - struct in6_rtmsg rtmsg; - - memset(&rtmsg, 0, sizeof(rtmsg)); - - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_MAIN, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_dst_len = 96, + .fc_flags = RTF_UP | RTF_NONEXTHOP, + }; /* prefix length - 96 bits "::d.d.d.d" */ - rtmsg.rtmsg_dst_len = 96; - rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; - rtmsg.rtmsg_ifindex = dev->ifindex; - - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&cfg); } static void addrconf_add_lroute(struct net_device *dev) @@ -1642,7 +1681,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { if (valid_lft == 0) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } else { rt->rt6i_expires = jiffies + rt_expires; @@ -1851,7 +1890,8 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, + __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -1860,21 +1900,41 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) ASSERT_RTNL(); + /* check the lifetime */ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - if ((idev = addrconf_add_dev(dev)) == NULL) return -ENOBUFS; scope = ipv6_addr_scope(pfx); - ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT); + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + if (!IS_ERR(ifp)) { + spin_lock_bh(&ifp->lock); + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + spin_unlock_bh(&ifp->lock); + addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); + addrconf_verify(0); return 0; } @@ -1927,7 +1987,8 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, + IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2300,10 +2361,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) Do not dev_put! */ if (how == 1) { - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = NULL; idev->dead = 1; - write_unlock_bh(&addrconf_lock); + + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, NULL); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -2470,7 +2531,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - !(ifp->flags&IFA_F_TENTATIVE)) { + !(ifp->flags&IFA_F_TENTATIVE) || + ifp->flags & IFA_F_NODAD) { ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); read_unlock_bh(&idev->lock); @@ -2715,6 +2777,26 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_IPV6_MIP6 +/* Check if address is a home address configured on any interface. */ +int ipv6_chk_home_addr(struct in6_addr *addr) +{ + int ret = 0; + struct inet6_ifaddr * ifp; + u8 hash = ipv6_addr_hash(addr); + read_lock_bh(&addrconf_hash_lock); + for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && + (ifp->flags & IFA_F_HOMEADDRESS)) { + ret = 1; + break; + } + } + read_unlock_bh(&addrconf_hash_lock); + return ret; +} +#endif + /* * Periodic address status verification */ @@ -2753,12 +2835,16 @@ restart: ifp->idev->nd_parms->retrans_time / HZ; #endif - if (age >= ifp->valid_lft) { + if (ifp->valid_lft != INFINITY_LIFE_TIME && + age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; + } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { + spin_unlock(&ifp->lock); + continue; } else if (age >= ifp->prefered_lft) { /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ int deprecate = 0; @@ -2821,178 +2907,267 @@ restart: spin_unlock_bh(&addrconf_verify_lock); } +static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +{ + struct in6_addr *pfx = NULL; + + if (addr) + pfx = nla_data(addr); + + if (local) { + if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) + pfx = NULL; + else + pfx = nla_data(local); + } + + return pfx; +} + +static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { + [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, + [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, +}; + static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; + int err; - pfx = NULL; - if (rta[IFA_ADDRESS-1]) { - if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_ADDRESS-1]); - } - if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_LOCAL-1]); - } + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); if (pfx == NULL) return -EINVAL; return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, + u32 prefered_lft, u32 valid_lft) +{ + if (!valid_lft || (prefered_lft > valid_lft)) + return -EINVAL; + + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + spin_lock_bh(&ifp->lock); + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; + ifp->tstamp = jiffies; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (!(ifp->flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + + addrconf_verify(0); + + return 0; +} + static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; + struct inet6_ifaddr *ifa; + struct net_device *dev; + u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; + u8 ifa_flags; + int err; - pfx = NULL; - if (rta[IFA_ADDRESS-1]) { - if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_ADDRESS-1]); - } - if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_LOCAL-1]); - } + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); if (pfx == NULL) return -EINVAL; - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + valid_lft = ci->ifa_valid; + preferred_lft = ci->ifa_prefered; + } else { + preferred_lft = INFINITY_LIFE_TIME; + valid_lft = INFINITY_LIFE_TIME; + } + + dev = __dev_get_by_index(ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + /* We ignore other flags so far. */ + ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); + + ifa = ipv6_get_ifaddr(pfx, dev, 1); + if (ifa == NULL) { + /* + * It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + ifa_flags, preferred_lft, valid_lft); + } + + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + err = -EEXIST; + else + err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); + + in6_ifa_put(ifa); + + return err; +} + +static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, + u8 scope, int ifindex) +{ + struct ifaddrmsg *ifm; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = prefixlen; + ifm->ifa_flags = flags; + ifm->ifa_scope = scope; + ifm->ifa_index = ifindex; +} + +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); } -/* Maximum length of ifa_cacheinfo attributes */ -#define INET6_IFADDR_RTA_SPACE \ - RTA_SPACE(16) /* IFA_ADDRESS */ + \ - RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ +static inline int rt_scope(int ifa_scope) +{ + if (ifa_scope & IFA_HOST) + return RT_SCOPE_HOST; + else if (ifa_scope & IFA_LINK) + return RT_SCOPE_LINK; + else if (ifa_scope & IFA_SITE) + return RT_SCOPE_SITE; + else + return RT_SCOPE_UNIVERSE; +} + +static inline int inet6_ifaddr_msgsize(void) +{ + return nlmsg_total_size(sizeof(struct ifaddrmsg) + + nla_total_size(16) + + nla_total_size(sizeof(struct ifa_cacheinfo)) + + 128); +} static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 pid, u32 seq, int event, unsigned int flags) { - struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - struct ifa_cacheinfo ci; - unsigned char *b = skb->tail; + u32 preferred, valid; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + + put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), + ifa->idev->dev->ifindex); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = ifa->prefix_len; - ifm->ifa_flags = ifa->flags; - ifm->ifa_scope = RT_SCOPE_UNIVERSE; - if (ifa->scope&IFA_HOST) - ifm->ifa_scope = RT_SCOPE_HOST; - else if (ifa->scope&IFA_LINK) - ifm->ifa_scope = RT_SCOPE_LINK; - else if (ifa->scope&IFA_SITE) - ifm->ifa_scope = RT_SCOPE_SITE; - ifm->ifa_index = ifa->idev->dev->ifindex; - RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); if (!(ifa->flags&IFA_F_PERMANENT)) { - ci.ifa_prefered = ifa->prefered_lft; - ci.ifa_valid = ifa->valid_lft; - if (ci.ifa_prefered != INFINITY_LIFE_TIME) { + preferred = ifa->prefered_lft; + valid = ifa->valid_lft; + if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - ifa->tstamp)/HZ; - ci.ifa_prefered -= tval; - if (ci.ifa_valid != INFINITY_LIFE_TIME) - ci.ifa_valid -= tval; + preferred -= tval; + if (valid != INFINITY_LIFE_TIME) + valid -= tval; } } else { - ci.ifa_prefered = INFINITY_LIFE_TIME; - ci.ifa_valid = INFINITY_LIFE_TIME; - } - ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; + } -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || + put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + return nlmsg_cancel(skb, nlh); + + return nlmsg_end(skb, nlh); } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, u32 pid, u32 seq, int event, u16 flags) { - struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - struct ifa_cacheinfo ci; - unsigned char *b = skb->tail; - - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = 128; - ifm->ifa_flags = IFA_F_PERMANENT; - ifm->ifa_scope = RT_SCOPE_UNIVERSE; - if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE) - ifm->ifa_scope = RT_SCOPE_SITE; - ifm->ifa_index = ifmca->idev->dev->ifindex; - RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); - ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.ifa_prefered = INFINITY_LIFE_TIME; - ci.ifa_valid = INFINITY_LIFE_TIME; - RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + u8 scope = RT_SCOPE_UNIVERSE; + int ifindex = ifmca->idev->dev->ifindex; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) + scope = RT_SCOPE_SITE; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); + if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || + put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) + return nlmsg_cancel(skb, nlh); + + return nlmsg_end(skb, nlh); } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, u32 pid, u32 seq, int event, unsigned int flags) { - struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - struct ifa_cacheinfo ci; - unsigned char *b = skb->tail; - - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = 128; - ifm->ifa_flags = IFA_F_PERMANENT; - ifm->ifa_scope = RT_SCOPE_UNIVERSE; - if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE) - ifm->ifa_scope = RT_SCOPE_SITE; - ifm->ifa_index = ifaca->aca_idev->dev->ifindex; - RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); - ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.ifa_prefered = INFINITY_LIFE_TIME; - ci.ifa_valid = INFINITY_LIFE_TIME; - RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + u8 scope = RT_SCOPE_UNIVERSE; + int ifindex = ifaca->aca_idev->dev->ifindex; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) + scope = RT_SCOPE_SITE; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); + if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || + put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) + return nlmsg_cancel(skb, nlh); + + return nlmsg_end(skb, nlh); } enum addr_type_t @@ -3103,23 +3278,74 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, + void *arg) { + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *addr = NULL; + struct net_device *dev = NULL; + struct inet6_ifaddr *ifa; struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int err; - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); - return; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + goto errout; + + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + if (addr == NULL) { + err = -EINVAL; + goto errout; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_index) + dev = __dev_get_by_index(ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { + err = -EADDRNOTAVAIL; + goto errout; } - if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { + + if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto errout_ifa; + } + + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, RTM_NEWADDR, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); - return; + goto errout_ifa; + } + + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +errout_ifa: + in6_ifa_put(ifa); +errout: + return err; +} + +static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); + if (err < 0) { + kfree_skb(skb); + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3154,6 +3380,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; } /* Maximum length of ifinfomsg attributes */ @@ -3260,20 +3487,23 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); + int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); - return; - } - if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); } /* Maximum length of prefix_cacheinfo attributes */ @@ -3325,33 +3555,40 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); + int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); - return; - } - if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr, + .dumpit = inet6_dump_ifaddr, }, [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, .dumpit = inet6_dump_fib, }, +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, +#endif }; static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -3360,7 +3597,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: - ip6_ins_rt(ifp->rt, NULL, NULL, NULL); + ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); break; @@ -3369,7 +3606,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->u.dst); - if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) + if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->u.dst); break; } @@ -3377,10 +3614,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); } #ifdef CONFIG_SYSCTL @@ -3477,7 +3714,7 @@ static struct addrconf_sysctl_table ctl_table addrconf_conf_dir[2]; ctl_table addrconf_proto_dir[2]; ctl_table addrconf_root_dir[2]; -} addrconf_sysctl = { +} addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { { @@ -3667,6 +3904,14 @@ static struct addrconf_sysctl_table #endif #endif { + .ctl_name = NET_IPV6_PROXY_NDP, + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0, /* sentinel */ } }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e19457fe4f6e..e94eccb99707 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -23,7 +23,6 @@ #include <linux/module.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -60,6 +59,9 @@ #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif +#ifdef CONFIG_IPV6_MIP6 +#include <net/mip6.h> +#endif #include <asm/uaccess.h> #include <asm/system.h> @@ -68,7 +70,7 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -int sysctl_ipv6_bindv6only; +int sysctl_ipv6_bindv6only __read_mostly; /* The inetsw table contains everything that inet_create needs to * build a new socket. @@ -244,7 +246,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - __u32 v4addr = 0; + __be32 v4addr = 0; unsigned short snum; int addr_type = 0; int err = 0; @@ -638,6 +640,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -659,9 +662,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + __ip6_dst_store(sk, dst, NULL, NULL); } return 0; @@ -760,6 +761,8 @@ static int __init inet6_init(void) struct list_head *r; int err; + BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + #ifdef MODULE #if 0 /* FIXME --RR */ if (!mod_member_present(&__this_module, can_unload)) @@ -769,11 +772,6 @@ static int __init inet6_init(void) #endif #endif - if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "inet6_proto_init: size fault\n"); - return -EINVAL; - } - err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -859,6 +857,9 @@ static int __init inet6_init(void) ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); +#ifdef CONFIG_IPV6_MIP6 + mip6_init(); +#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -922,6 +923,9 @@ static void __exit inet6_exit(void) tcp6_proc_exit(); raw6_proc_exit(); #endif +#ifdef CONFIG_IPV6_MIP6 + mip6_fini(); +#endif /* Cleanup code parts. */ sit_cleanup(); ip6_flowlabel_cleanup(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index d31c0d6c0448..b0d83e8e4252 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -24,7 +24,6 @@ * This file is derived from net/ipv4/ah.c. */ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/ah.h> @@ -75,6 +74,66 @@ bad: return 0; } +#ifdef CONFIG_IPV6_MIP6 +/** + * ipv6_rearrange_destopt - rearrange IPv6 destination options header + * @iph: IPv6 header + * @destopt: destionation options header + */ +static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) +{ + u8 *opt = (u8 *)destopt; + int len = ipv6_optlen(destopt); + int off = 0; + int optlen = 0; + + off += 2; + len -= 2; + + while (len > 0) { + + switch (opt[off]) { + + case IPV6_TLV_PAD0: + optlen = 1; + break; + default: + if (len < 2) + goto bad; + optlen = opt[off+1]+2; + if (len < optlen) + goto bad; + + /* Rearrange the source address in @iph and the + * addresses in home address option for final source. + * See 11.3.2 of RFC 3775 for details. + */ + if (opt[off] == IPV6_TLV_HAO) { + struct in6_addr final_addr; + struct ipv6_destopt_hao *hao; + + hao = (struct ipv6_destopt_hao *)&opt[off]; + if (hao->length != sizeof(hao->addr)) { + if (net_ratelimit()) + printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length); + goto bad; + } + ipv6_addr_copy(&final_addr, &hao->addr); + ipv6_addr_copy(&hao->addr, &iph->saddr); + ipv6_addr_copy(&iph->saddr, &final_addr); + } + break; + } + + off += optlen; + len -= optlen; + } + /* Note: ok if len == 0 */ +bad: + return; +} +#endif + /** * ipv6_rearrange_rthdr - rearrange IPv6 routing header * @iph: IPv6 header @@ -114,7 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr) ipv6_addr_copy(&iph->daddr, &final_addr); } -static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) +static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) { union { struct ipv6hdr *iph; @@ -129,8 +188,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) while (exthdr.raw < end) { switch (nexthdr) { - case NEXTHDR_HOP: case NEXTHDR_DEST: +#ifdef CONFIG_IPV6_MIP6 + if (dir == XFRM_POLICY_OUT) + ipv6_rearrange_destopt(iph, exthdr.opth); +#endif + case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( KERN_WARNING "overrun %sopts\n", @@ -165,6 +228,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; char tmp_base[8]; struct { +#ifdef CONFIG_IPV6_MIP6 + struct in6_addr saddr; +#endif struct in6_addr daddr; char hdrs[0]; } *tmp_ext; @@ -189,10 +255,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) err = -ENOMEM; goto error; } +#ifdef CONFIG_IPV6_MIP6 + memcpy(tmp_ext, &top_iph->saddr, extlen); +#else memcpy(tmp_ext, &top_iph->daddr, extlen); +#endif err = ipv6_clear_mutable_options(top_iph, extlen - sizeof(*tmp_ext) + - sizeof(*top_iph)); + sizeof(*top_iph), + XFRM_POLICY_OUT); if (err) goto error_free_iph; } @@ -214,13 +285,20 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error_free_iph; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); err = 0; memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { +#ifdef CONFIG_IPV6_MIP6 + memcpy(&top_iph->saddr, tmp_ext, extlen); +#else memcpy(&top_iph->daddr, tmp_ext, extlen); +#endif error_free_iph: kfree(tmp_ext); } @@ -252,6 +330,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) u16 hdr_len; u16 ah_hlen; int nexthdr; + int err = -EINVAL; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -279,7 +358,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!tmp_hdr) goto out; memcpy(tmp_hdr, skb->nh.raw, hdr_len); - if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len)) + if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) goto free_out; skb->nh.ipv6h->priority = 0; skb->nh.ipv6h->flow_lbl[0] = 0; @@ -293,8 +372,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); skb_push(skb, hdr_len); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto free_out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); x->stats.integrity_failed++; goto free_out; @@ -311,7 +393,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) free_out: kfree(tmp_hdr); out: - return -EINVAL; + return err; } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -339,6 +421,7 @@ static int ah6_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -356,24 +439,27 @@ static int ah6_init_state(struct xfrm_state *x) ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -388,7 +474,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = ahp; @@ -397,7 +483,7 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -412,7 +498,7 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } @@ -425,7 +511,8 @@ static struct xfrm_type ah6_type = .init_state = ah6_init_state, .destructor = ah6_destroy, .input = ah6_input, - .output = ah6_output + .output = ah6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ah6_protocol = { diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 39ec528923f6..a9604764e015 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -14,7 +14,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> @@ -57,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) int onlink; onlink = 0; - read_lock(&addrconf_lock); + rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); @@ -69,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); return onlink; } @@ -336,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) write_unlock_bh(&idev->lock); dst_hold(&rt->u.dst); - if (ip6_ins_rt(rt, NULL, NULL, NULL)) + if (ip6_ins_rt(rt)) dst_release(&rt->u.dst); addrconf_join_solict(dev, &aca->aca_addr); @@ -379,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) addrconf_leave_solict(idev, &aca->aca_addr); dst_hold(&aca->aca_rt->u.dst); - if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL)) + if (ip6_del_rt(aca->aca_rt)) dst_free(&aca->aca_rt->u.dst); else dst_release(&aca->aca_rt->u.dst); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 99a6eb23378b..7206747022fc 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -156,6 +156,8 @@ ipv4_connected: if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); + if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; @@ -191,7 +193,12 @@ ipv4_connected: ip6_dst_store(sk, dst, ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); sk->sk_state = TCP_ESTABLISHED; out: @@ -641,10 +648,13 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); - /* - * TYPE 0 - */ - if (rthdr->type) { + switch (rthdr->type) { + case IPV6_SRCRT_TYPE_0: +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: +#endif + break; + default: err = -EINVAL; goto exit_f; } @@ -696,7 +706,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, } tc = *(int *)CMSG_DATA(cmsg); - if (tc < 0 || tc > 0xff) + if (tc < -1 || tc > 0xff) goto exit_f; err = 0; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index a15a6f320f70..e78680a9985b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -24,7 +24,7 @@ * This file is derived from net/ipv4/esp.c */ -#include <linux/config.h> +#include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -45,7 +45,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; struct ipv6hdr *top_iph; struct ipv6_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -68,7 +69,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -96,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -108,24 +116,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } - err = 0; - error: return err; } @@ -135,8 +144,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *iph; struct ipv6_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; @@ -156,15 +167,16 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; + u8 sum[alen]; - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + ret = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (ret) + goto out; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; ret = -EINVAL; goto out; @@ -183,7 +195,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); { u8 nexthdr[2]; @@ -198,9 +210,11 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } } skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(ret)) + goto out; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -226,9 +240,9 @@ out: static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -267,11 +281,11 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -281,6 +295,7 @@ static void esp6_destroy(struct xfrm_state *x) static int esp6_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -299,24 +314,29 @@ static int esp6_init_state(struct xfrm_state *x) if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { - printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), - aalg_desc->uinfo.auth.icv_fullbits/8); - goto error; + crypto_hash_digestsize(hash)) { + NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_hash_digestsize(hash), + aalg_desc->uinfo.auth.icv_fullbits/8); + goto error; } esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; @@ -328,24 +348,22 @@ static int esp6_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = esp; return 0; @@ -366,7 +384,8 @@ static struct xfrm_type esp6_type = .destructor = esp6_destroy, .get_max_size = esp6_get_max_size, .input = esp6_input, - .output = esp6_output + .output = esp6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol esp6_protocol = { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a18d4256372c..88c96b10684c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,9 +43,54 @@ #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#ifdef CONFIG_IPV6_MIP6 +#include <net/xfrm.h> +#endif #include <asm/uaccess.h> +int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) +{ + int packet_len = skb->tail - skb->nh.raw; + struct ipv6_opt_hdr *hdr; + int len; + + if (offset + 2 > packet_len) + goto bad; + hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + len = ((hdr->hdrlen + 1) << 3); + + if (offset + len > packet_len) + goto bad; + + offset += 2; + len -= 2; + + while (len > 0) { + int opttype = skb->nh.raw[offset]; + int optlen; + + if (opttype == type) + return offset; + + switch (opttype) { + case IPV6_TLV_PAD0: + optlen = 1; + break; + default: + optlen = skb->nh.raw[offset + 1] + 2; + if (optlen > len) + goto bad; + break; + } + offset += optlen; + len -= optlen; + } + /* not_found */ + bad: + return -1; +} + /* * Parsing tlv encoded headers. * @@ -56,7 +101,7 @@ struct tlvtype_proc { int type; - int (*func)(struct sk_buff *skb, int offset); + int (*func)(struct sk_buff **skbp, int offset); }; /********************* @@ -65,8 +110,10 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -91,8 +138,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; int off = skb->h.raw - skb->nh.raw; int len = ((skb->h.raw[1]+1)<<3); @@ -122,13 +170,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skb, off) == 0) + if (curr->func(skbp, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skb, off) == 0) + if (ip6_tlvopt_unknown(skbp, off) == 0) return 0; } break; @@ -147,8 +195,85 @@ bad: Destination options header. *****************************/ +#ifdef CONFIG_IPV6_MIP6 +static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) +{ + struct sk_buff *skb = *skbp; + struct ipv6_destopt_hao *hao; + struct inet6_skb_parm *opt = IP6CB(skb); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw; + struct in6_addr tmp_addr; + int ret; + + if (opt->dsthao) { + LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + goto discard; + } + opt->dsthao = opt->dst1; + opt->dst1 = 0; + + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff); + + if (hao->length != 16) { + LIMIT_NETDEBUG( + KERN_DEBUG "hao invalid option length = %d\n", hao->length); + goto discard; + } + + if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { + LIMIT_NETDEBUG( + KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + goto discard; + } + + ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, + (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); + if (unlikely(ret < 0)) + goto discard; + + if (skb_cloned(skb)) { + struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); + struct inet6_skb_parm *opt2; + + if (skb2 == NULL) + goto discard; + + opt2 = IP6CB(skb2); + memcpy(opt2, opt, sizeof(*opt2)); + + kfree_skb(skb); + + /* update all variable using below by copied skbuff */ + *skbp = skb = skb2; + hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff); + ipv6h = (struct ipv6hdr *)skb2->nh.raw; + } + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + + ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); + ipv6_addr_copy(&ipv6h->saddr, &hao->addr); + ipv6_addr_copy(&hao->addr, &tmp_addr); + + if (skb->tstamp.off_sec == 0) + __net_timestamp(skb); + + return 1; + + discard: + kfree_skb(skb); + return 0; +} +#endif + static struct tlvtype_proc tlvprocdestopt_lst[] = { - /* No destination options are defined now */ +#ifdef CONFIG_IPV6_MIP6 + { + .type = IPV6_TLV_HAO, + .func = ipv6_dest_hao, + }, +#endif {-1, NULL} }; @@ -156,6 +281,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); +#ifdef CONFIG_IPV6_MIP6 + __u16 dstbuf; +#endif if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { @@ -166,10 +294,19 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; +#ifdef CONFIG_IPV6_MIP6 + dstbuf = opt->dst1; +#endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); + opt = IP6CB(skb); +#ifdef CONFIG_IPV6_MIP6 + opt->nhoff = dstbuf; +#else opt->nhoff = opt->dst1; +#endif return 1; } @@ -179,7 +316,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) static struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; void __init ipv6_destopt_init(void) @@ -219,7 +356,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); - struct in6_addr *addr; + struct in6_addr *addr = NULL; struct in6_addr daddr; int n, i; @@ -244,6 +381,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) looped_back: if (hdr->segments_left == 0) { + switch (hdr->type) { +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + /* Silently discard type 2 header unless it was + * processed by own + */ + if (!addr) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: + break; + } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; @@ -253,17 +407,29 @@ looped_back: return 1; } - if (hdr->type != IPV6_SRCRT_TYPE_0) { + switch (hdr->type) { + case IPV6_SRCRT_TYPE_0: + if (hdr->hdrlen & 0x01) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); + return -1; + } + break; +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + /* Silently discard invalid RTH type 2 */ + if (hdr->hdrlen != 2 || hdr->segments_left != 1) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } - - if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); - return -1; - } /* * This is the routing header forwarding algorithm from @@ -294,7 +460,7 @@ looped_back: hdr = (struct ipv6_rt_hdr *) skb2->h.raw; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; i = n - --hdr->segments_left; @@ -303,6 +469,27 @@ looped_back: addr = rthdr->addr; addr += i - 1; + switch (hdr->type) { +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, + (xfrm_address_t *)&skb->nh.ipv6h->saddr, + IPPROTO_ROUTING) < 0) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + if (!ipv6_chk_home_addr(addr)) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: + break; + } + if (ipv6_addr_is_multicast(addr)) { IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); @@ -340,7 +527,7 @@ looped_back: static struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, - .flags = INET6_PROTO_NOPOLICY, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; void __init ipv6_rthdr_init(void) @@ -421,8 +608,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff *skb, int optoff) +static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + if (skb->nh.raw[optoff+1] == 2) { IP6CB(skb)->ra = optoff; return 1; @@ -435,8 +624,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) +static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { @@ -485,8 +675,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff *skb) +int ipv6_parse_hopopts(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -502,8 +693,10 @@ int ipv6_parse_hopopts(struct sk_buff *skb) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { + skb = *skbp; skb->h.raw += (skb->h.raw[1]+1)<<3; + opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); return 1; } @@ -635,14 +828,17 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, struct ipv6_txoptions *opt2; int err; - if (newtype != IPV6_HOPOPTS && opt->hopopt) - tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); - if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) - tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); - if (newtype != IPV6_RTHDR && opt->srcrt) - tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); - if (newtype != IPV6_DSTOPTS && opt->dst1opt) - tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); + if (opt) { + if (newtype != IPV6_HOPOPTS && opt->hopopt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); + if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); + if (newtype != IPV6_RTHDR && opt->srcrt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); + if (newtype != IPV6_DSTOPTS && opt->dst1opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); + } + if (newopt && newoptlen) tot_len += CMSG_ALIGN(newoptlen); @@ -659,25 +855,25 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, opt2->tot_len = tot_len; p = (char *)(opt2 + 1); - err = ipv6_renew_option(opt->hopopt, newopt, newoptlen, + err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen, newtype != IPV6_HOPOPTS, &opt2->hopopt, &p); if (err) goto out; - err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen, + err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen, newtype != IPV6_RTHDRDSTOPTS, &opt2->dst0opt, &p); if (err) goto out; - err = ipv6_renew_option(opt->srcrt, newopt, newoptlen, + err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen, newtype != IPV6_RTHDR, - (struct ipv6_opt_hdr **)opt2->srcrt, &p); + (struct ipv6_opt_hdr **)&opt2->srcrt, &p); if (err) goto out; - err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen, + err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen, newtype != IPV6_DSTOPTS, &opt2->dst1opt, &p); if (err) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c new file mode 100644 index 000000000000..34f5bfaddfc2 --- /dev/null +++ b/net/ipv6/fib6_rules.c @@ -0,0 +1,305 @@ +/* + * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules + * + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors + * Thomas Graf <tgraf@suug.ch> + * Ville Nuorvala <vnuorval@tcs.hut.fi> + */ + +#include <linux/config.h> +#include <linux/netdevice.h> + +#include <net/fib_rules.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/netlink.h> + +struct fib6_rule +{ + struct fib_rule common; + struct rt6key src; + struct rt6key dst; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + u32 fwmark; + u32 fwmask; +#endif + u8 tclass; +}; + +static struct fib_rules_ops fib6_rules_ops; + +static struct fib6_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_MAIN, + }, +}; + +static struct fib6_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_LOCAL, + .flags = FIB_RULE_PERMANENT, + }, +}; + +static LIST_HEAD(fib6_rules); + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + struct fib_lookup_arg arg = { + .lookup_ptr = lookup, + }; + + fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); + if (arg.rule) + fib_rule_put(arg.rule); + + if (arg.result) + return (struct dst_entry *) arg.result; + + dst_hold(&ip6_null_entry.u.dst); + return &ip6_null_entry.u.dst; +} + +static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) +{ + struct rt6_info *rt = NULL; + struct fib6_table *table; + pol_lookup_t lookup = arg->lookup_ptr; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + case FR_ACT_UNREACHABLE: + rt = &ip6_null_entry; + goto discard_pkt; + default: + case FR_ACT_BLACKHOLE: + rt = &ip6_blk_hole_entry; + goto discard_pkt; + case FR_ACT_PROHIBIT: + rt = &ip6_prohibit_entry; + goto discard_pkt; + } + + table = fib6_get_table(rule->table); + if (table) + rt = lookup(table, flp, flags); + + if (rt != &ip6_null_entry) + goto out; + dst_release(&rt->u.dst); + rt = NULL; + goto out; + +discard_pkt: + dst_hold(&rt->u.dst); +out: + arg->result = rt; + return rt == NULL ? -EAGAIN : 0; +} + + +static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib6_rule *r = (struct fib6_rule *) rule; + + if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + return 0; + + if ((flags & RT6_LOOKUP_F_HAS_SADDR) && + !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) + return 0; + + if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) + return 0; + +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask) + return 0; +#endif + + return 1; +} + +static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .len = sizeof(struct in6_addr) }, + [FRA_DST] = { .len = sizeof(struct in6_addr) }, + [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, +}; + +static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len > 128 || frh->dst_len > 128 || + (frh->tos & ~IPV6_FLOWINFO_MASK)) + goto errout; + + if (rule->action == FR_ACT_TO_TBL) { + if (rule->table == RT6_TABLE_UNSPEC) + goto errout; + + if (fib6_new_table(rule->table) == NULL) { + err = -ENOBUFS; + goto errout; + } + } + + if (tb[FRA_SRC]) + nla_memcpy(&rule6->src.addr, tb[FRA_SRC], + sizeof(struct in6_addr)); + + if (tb[FRA_DST]) + nla_memcpy(&rule6->dst.addr, tb[FRA_DST], + sizeof(struct in6_addr)); + +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK]) { + rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule6->fwmark) { + /* + * if the mark value is non-zero, + * all bits are compared by default + * unless a mask is explicitly specified. + */ + rule6->fwmask = 0xFFFFFFFF; + } + } + + if (tb[FRA_FWMASK]) + rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]); +#endif + + rule6->src.plen = frh->src_len; + rule6->dst.plen = frh->dst_len; + rule6->tclass = frh->tos; + + err = 0; +errout: + return err; +} + +static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len && (rule6->src.plen != frh->src_len)) + return 0; + + if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) + return 0; + + if (frh->tos && (rule6->tclass != frh->tos)) + return 0; + + if (tb[FRA_SRC] && + nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) + return 0; + + if (tb[FRA_DST] && + nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) + return 0; + +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; + + if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; +#endif + + return 1; +} + +static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + frh->family = AF_INET6; + frh->dst_len = rule6->dst.plen; + frh->src_len = rule6->src.plen; + frh->tos = rule6->tclass; + + if (rule6->dst.plen) + NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), + &rule6->dst.addr); + + if (rule6->src.plen) + NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), + &rule6->src.addr); + +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (rule6->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); + + if (rule6->fwmask || rule6->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask); +#endif + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return fib_rules_dump(skb, cb, AF_INET6); +} + +static u32 fib6_rule_default_pref(void) +{ + return 0x3FFF; +} + +static struct fib_rules_ops fib6_rules_ops = { + .family = AF_INET6, + .rule_size = sizeof(struct fib6_rule), + .action = fib6_rule_action, + .match = fib6_rule_match, + .configure = fib6_rule_configure, + .compare = fib6_rule_compare, + .fill = fib6_rule_fill, + .default_pref = fib6_rule_default_pref, + .nlgroup = RTNLGRP_IPV6_RULE, + .policy = fib6_rule_policy, + .rules_list = &fib6_rules, + .owner = THIS_MODULE, +}; + +void __init fib6_rules_init(void) +{ + list_add_tail(&local_rule.common.list, &fib6_rules); + list_add_tail(&main_rule.common.list, &fib6_rules); + + fib_rules_register(&fib6_rules_ops); +} + +void fib6_rules_cleanup(void) +{ + fib_rules_unregister(&fib6_rules_ops); +} diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1044b6fce0d5..4ec876066b3f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb) return 0; } -static int sysctl_icmpv6_time = 1*HZ; +static int sysctl_icmpv6_time __read_mostly = 1*HZ; /* * Check the ICMP output rate limit @@ -273,6 +273,29 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } +#ifdef CONFIG_IPV6_MIP6 +static void mip6_addr_swap(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct inet6_skb_parm *opt = IP6CB(skb); + struct ipv6_destopt_hao *hao; + struct in6_addr tmp; + int off; + + if (opt->dsthao) { + off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); + if (likely(off >= 0)) { + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off); + ipv6_addr_copy(&tmp, &iph->saddr); + ipv6_addr_copy(&iph->saddr, &hao->addr); + ipv6_addr_copy(&hao->addr, &tmp); + } + } +} +#else +static inline void mip6_addr_swap(struct sk_buff *skb) {} +#endif + /* * Send an ICMP message in response to a packet in error */ @@ -350,6 +373,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, return; } + mip6_addr_swap(skb); + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); @@ -358,6 +383,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -401,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); - tclass = np->cork.tclass; + tclass = np->tclass; if (tclass < 0) tclass = 0; @@ -472,6 +498,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -497,7 +524,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); - tclass = np->cork.tclass; + tclass = np->tclass; if (tclass < 0) tclass = 0; @@ -604,7 +631,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) /* Perform checksum. */ switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) break; @@ -712,6 +739,11 @@ discard_it: return 0; } +/* + * Special lock-class for __icmpv6_socket: + */ +static struct lock_class_key icmpv6_socket_sk_dst_lock_key; + int __init icmpv6_init(struct net_proto_family *ops) { struct sock *sk; @@ -730,6 +762,14 @@ int __init icmpv6_init(struct net_proto_family *ops) sk = per_cpu(__icmpv6_socket, i)->sk; sk->sk_allocation = GFP_ATOMIC; + /* + * Split off their lock-class, because sk->sk_dst_lock + * gets used from softirqs, which is safe for + * __icmpv6_socket (because those never get directly used + * via userspace syscalls), but unsafe for normal sockets. + */ + lockdep_set_class(&sk->sk_dst_lock, + &icmpv6_socket_sk_dst_lock_key); /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index eb2865d5ae28..827f41d1478b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -13,7 +13,6 @@ * 2 of the License, or(at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/in6.h> #include <linux/ipv6.h> @@ -158,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_sport = inet->sport; fl.fl_ip_dport = inet->dport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -186,9 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + __ip6_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 2ae84c961678..8accd1fbeeda 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -14,7 +14,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/random.h> @@ -65,7 +64,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, { struct sock *sk; const struct hlist_node *node; - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ @@ -83,7 +82,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { const struct inet_timewait_sock *tw = inet_twsk(sk); - if(*((__u32 *)&(tw->tw_dport)) == ports && + if(*((__portpair *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); @@ -172,7 +171,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &np->rcv_saddr; const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -189,7 +188,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = inet_twsk(sk2); - if(*((__u32 *)&(tw->tw_dport)) == ports && + if(*((__portpair *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2cb6149349bf..8fcae7a6510b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -18,8 +18,8 @@ * Yuji SEKIYA @USAGI: Support default route on router node; * remove ip6_null_entry from the top of * routing table. + * Ville Nuorvala: Fixed routing subtrees. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/net.h> @@ -27,6 +27,7 @@ #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/init.h> +#include <linux/list.h> #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> @@ -69,19 +70,19 @@ struct fib6_cleaner_t void *arg; }; -DEFINE_RWLOCK(fib6_walker_lock); - +static DEFINE_RWLOCK(fib6_walker_lock); #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S -#define SUBTREE(fn) ((fn)->subtree) #else #define FWS_INIT FWS_L -#define SUBTREE(fn) NULL #endif static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); +static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); +static int fib6_walk(struct fib6_walker_t *w); +static int fib6_walk_continue(struct fib6_walker_t *w); /* * A routing update causes an increase of the serial number on the @@ -94,13 +95,31 @@ static __u32 rt_sernum; static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); -struct fib6_walker_t fib6_walker_list = { +static struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, .next = &fib6_walker_list, }; #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) +static inline void fib6_walker_link(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next = fib6_walker_list.next; + w->prev = &fib6_walker_list; + w->next->prev = w; + w->prev->next = w; + write_unlock_bh(&fib6_walker_lock); +} + +static inline void fib6_walker_unlink(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next->prev = w->prev; + w->prev->next = w->next; + w->prev = w->next = w; + write_unlock_bh(&fib6_walker_lock); +} static __inline__ u32 fib6_new_sernum(void) { u32 n = ++rt_sernum; @@ -148,6 +167,253 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } +static struct fib6_table fib6_main_tbl = { + .tb6_id = RT6_TABLE_MAIN, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB_TABLE_HASHSZ 256 +#else +#define FIB_TABLE_HASHSZ 1 +#endif +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + +static void fib6_link_table(struct fib6_table *tb) +{ + unsigned int h; + + h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + + /* + * No protection necessary, this is the only list mutatation + * operation, tables never disappear once they exist. + */ + hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); +} + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +static struct fib6_table fib6_local_tbl = { + .tb6_id = RT6_TABLE_LOCAL, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + +static struct fib6_table *fib6_alloc_table(u32 id) +{ + struct fib6_table *table; + + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (table != NULL) { + table->tb6_id = id; + table->tb6_lock = RW_LOCK_UNLOCKED; + table->tb6_root.leaf = &ip6_null_entry; + table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + } + + return table; +} + +struct fib6_table *fib6_new_table(u32 id) +{ + struct fib6_table *tb; + + if (id == 0) + id = RT6_TABLE_MAIN; + tb = fib6_get_table(id); + if (tb) + return tb; + + tb = fib6_alloc_table(id); + if (tb != NULL) + fib6_link_table(tb); + + return tb; +} + +struct fib6_table *fib6_get_table(u32 id) +{ + struct fib6_table *tb; + struct hlist_node *node; + unsigned int h; + + if (id == 0) + id = RT6_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { + if (tb->tb6_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + + return NULL; +} + +static void __init fib6_tables_init(void) +{ + fib6_link_table(&fib6_main_tbl); + fib6_link_table(&fib6_local_tbl); +} + +#else + +struct fib6_table *fib6_new_table(u32 id) +{ + return fib6_get_table(id); +} + +struct fib6_table *fib6_get_table(u32 id) +{ + return &fib6_main_tbl; +} + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); +} + +static void __init fib6_tables_init(void) +{ + fib6_link_table(&fib6_main_tbl); +} + +#endif + +static int fib6_dump_node(struct fib6_walker_t *w) +{ + int res; + struct rt6_info *rt; + + for (rt = w->leaf; rt; rt = rt->u.next) { + res = rt6_dump_route(rt, w->args); + if (res < 0) { + /* Frame is full, suspend walking */ + w->leaf = rt; + return 1; + } + BUG_TRAP(res!=0); + } + w->leaf = NULL; + return 0; +} + +static void fib6_dump_end(struct netlink_callback *cb) +{ + struct fib6_walker_t *w = (void*)cb->args[2]; + + if (w) { + cb->args[2] = 0; + kfree(w); + } + cb->done = (void*)cb->args[3]; + cb->args[1] = 3; +} + +static int fib6_dump_done(struct netlink_callback *cb) +{ + fib6_dump_end(cb); + return cb->done ? cb->done(cb) : 0; +} + +static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct fib6_walker_t *w; + int res; + + w = (void *)cb->args[2]; + w->root = &table->tb6_root; + + if (cb->args[4] == 0) { + read_lock_bh(&table->tb6_lock); + res = fib6_walk(w); + read_unlock_bh(&table->tb6_lock); + if (res > 0) + cb->args[4] = 1; + } else { + read_lock_bh(&table->tb6_lock); + res = fib6_walk_continue(w); + read_unlock_bh(&table->tb6_lock); + if (res != 0) { + if (res < 0) + fib6_walker_unlink(w); + goto end; + } + fib6_walker_unlink(w); + cb->args[4] = 0; + } +end: + return res; +} + +int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct rt6_rtnl_dump_arg arg; + struct fib6_walker_t *w; + struct fib6_table *tb; + struct hlist_node *node; + int res = 0; + + s_h = cb->args[0]; + s_e = cb->args[1]; + + w = (void *)cb->args[2]; + if (w == NULL) { + /* New dump: + * + * 1. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + + /* + * 2. allocate and initialize walker. + */ + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (w == NULL) + return -ENOMEM; + w->func = fib6_dump_node; + cb->args[2] = (long)w; + } + + arg.skb = skb; + arg.cb = cb; + w->args = &arg; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { + if (e < s_e) + goto next; + res = fib6_dump_table(tb, skb, cb); + if (res != 0) + goto out; +next: + e++; + } + } +out: + cb->args[1] = e; + cb->args[0] = h; + + res = res < 0 ? res : skb->len; + if (res <= 0) + fib6_dump_end(cb); + return res; +} /* * Routing Table @@ -344,7 +610,7 @@ insert_above: */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nlmsghdr *nlh, struct netlink_skb_parms *req) + struct nl_info *info) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -399,7 +665,7 @@ out: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); + inet6_rt_notify(RTM_NEWROUTE, rt, info); rt6_stats.fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { @@ -429,10 +695,9 @@ void fib6_force_start_gc(void) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) { - struct fib6_node *fn; + struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), @@ -441,6 +706,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (fn == NULL) goto out; + pn = fn; + #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen) { struct fib6_node *sn; @@ -486,10 +753,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, /* Now link new subtree to main tree */ sfn->parent = fn; fn->subtree = sfn; - if (fn->leaf == NULL) { - fn->leaf = rt; - atomic_inc(&rt->rt6i_ref); - } } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, @@ -499,21 +762,42 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, goto st_failure; } + if (fn->leaf == NULL) { + fn->leaf = rt; + atomic_inc(&rt->rt6i_ref); + } fn = sn; } #endif - err = fib6_add_rt2node(fn, rt, nlh, req); + err = fib6_add_rt2node(fn, rt, info); if (err == 0) { fib6_start_gc(rt); if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(fn, rt); + fib6_prune_clones(pn, rt); } out: - if (err) + if (err) { +#ifdef CONFIG_IPV6_SUBTREES + /* + * If fib6_add_1 has cleared the old leaf pointer in the + * super-tree leaf node we have to find a new one for it. + */ + if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn->leaf = fib6_find_prefix(pn); +#if RT6_DEBUG >= 2 + if (!pn->leaf) { + BUG_TRAP(pn->leaf != NULL); + pn->leaf = &ip6_null_entry; + } +#endif + atomic_inc(&pn->leaf->rt6i_ref); + } +#endif dst_free(&rt->u.dst); + } return err; #ifdef CONFIG_IPV6_SUBTREES @@ -544,6 +828,9 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct fib6_node *fn; int dir; + if (unlikely(args->offset == 0)) + return NULL; + /* * Descend on a tree */ @@ -565,33 +852,26 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, break; } - while ((fn->fn_flags & RTN_ROOT) == 0) { -#ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) { - struct fib6_node *st; - struct lookup_args *narg; - - narg = args + 1; - - if (narg->addr) { - st = fib6_lookup_1(fn->subtree, narg); - - if (st && !(st->fn_flags & RTN_ROOT)) - return st; - } - } -#endif - - if (fn->fn_flags & RTN_RTINFO) { + while(fn) { + if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { struct rt6key *key; key = (struct rt6key *) ((u8 *) fn->leaf + args->offset); - if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) - return fn; + if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { +#ifdef CONFIG_IPV6_SUBTREES + if (fn->subtree) + fn = fib6_lookup_1(fn->subtree, args + 1); +#endif + if (!fn || fn->fn_flags & RTN_RTINFO) + return fn; + } } + if (fn->fn_flags & RTN_ROOT) + break; + fn = fn->parent; } @@ -601,18 +881,24 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr) { - struct lookup_args args[2]; struct fib6_node *fn; - - args[0].offset = offsetof(struct rt6_info, rt6i_dst); - args[0].addr = daddr; - + struct lookup_args args[] = { + { + .offset = offsetof(struct rt6_info, rt6i_dst), + .addr = daddr, + }, #ifdef CONFIG_IPV6_SUBTREES - args[1].offset = offsetof(struct rt6_info, rt6i_src); - args[1].addr = saddr; + { + .offset = offsetof(struct rt6_info, rt6i_src), + .addr = saddr, + }, #endif + { + .offset = 0, /* sentinel */ + } + }; - fn = fib6_lookup_1(root, args); + fn = fib6_lookup_1(root, daddr ? args : args + 1); if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) fn = root; @@ -668,10 +954,8 @@ struct fib6_node * fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { BUG_TRAP(saddr!=NULL); - if (fn == NULL) - fn = fn->subtree; - if (fn) - fn = fib6_locate_1(fn, saddr, src_len, + if (fn && fn->subtree) + fn = fib6_locate_1(fn->subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); } #endif @@ -700,7 +984,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) if(fn->right) return fn->right->leaf; - fn = SUBTREE(fn); + fn = FIB6_SUBTREE(fn); } return NULL; } @@ -731,7 +1015,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) if (fn->right) child = fn->right, children |= 1; if (fn->left) child = fn->left, children |= 2; - if (children == 3 || SUBTREE(fn) + if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES /* Subtree root (i.e. fn) may have one child */ || (children && fn->fn_flags&RTN_ROOT) @@ -750,9 +1034,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES - if (SUBTREE(pn) == fn) { + if (FIB6_SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); - SUBTREE(pn) = NULL; + FIB6_SUBTREE(pn) = NULL; nstate = FWS_L; } else { BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); @@ -800,7 +1084,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) read_unlock(&fib6_walker_lock); node_free(fn); - if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) + if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; rt6_release(pn->leaf); @@ -810,7 +1094,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) } static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) + struct nl_info *info) { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; @@ -866,11 +1150,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (atomic_read(&rt->rt6i_ref) != 1) BUG(); } - inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); + inet6_rt_notify(RTM_DELROUTE, rt, info); rt6_release(rt); } -int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int fib6_del(struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -886,8 +1170,18 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne BUG_TRAP(fn->fn_flags&RTN_RTINFO); - if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(fn, rt); + if (!(rt->rt6i_flags&RTF_CACHE)) { + struct fib6_node *pn = fn; +#ifdef CONFIG_IPV6_SUBTREES + /* clones of this route might be in another subtree */ + if (rt->rt6i_src.plen) { + while (!(pn->fn_flags&RTN_ROOT)) + pn = pn->parent; + pn = pn->parent; + } +#endif + fib6_prune_clones(pn, rt); + } /* * Walk the leaf entries looking for ourself @@ -895,7 +1189,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { if (*rtp == rt) { - fib6_del_route(fn, rtp, nlh, _rtattr, req); + fib6_del_route(fn, rtp, info); return 0; } } @@ -926,7 +1220,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne * <0 -> walk is terminated by an error. */ -int fib6_walk_continue(struct fib6_walker_t *w) +static int fib6_walk_continue(struct fib6_walker_t *w) { struct fib6_node *fn, *pn; @@ -943,8 +1237,8 @@ int fib6_walk_continue(struct fib6_walker_t *w) switch (w->state) { #ifdef CONFIG_IPV6_SUBTREES case FWS_S: - if (SUBTREE(fn)) { - w->node = SUBTREE(fn); + if (FIB6_SUBTREE(fn)) { + w->node = FIB6_SUBTREE(fn); continue; } w->state = FWS_L; @@ -978,7 +1272,7 @@ int fib6_walk_continue(struct fib6_walker_t *w) pn = fn->parent; w->node = pn; #ifdef CONFIG_IPV6_SUBTREES - if (SUBTREE(pn) == fn) { + if (FIB6_SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); w->state = FWS_L; continue; @@ -1000,7 +1294,7 @@ int fib6_walk_continue(struct fib6_walker_t *w) } } -int fib6_walk(struct fib6_walker_t *w) +static int fib6_walk(struct fib6_walker_t *w) { int res; @@ -1024,7 +1318,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL, NULL, NULL); + res = fib6_del(rt, NULL); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); @@ -1050,9 +1344,9 @@ static int fib6_clean_node(struct fib6_walker_t *w) * ignoring pure split nodes) will be scanned. */ -void fib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) +static void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) { struct fib6_cleaner_t c; @@ -1065,6 +1359,25 @@ void fib6_clean_tree(struct fib6_node *root, fib6_walk(&c.w); } +void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) +{ + struct fib6_table *table; + struct hlist_node *node; + unsigned int h; + + rcu_read_lock(); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], + tb6_hlist) { + write_lock_bh(&table->tb6_lock); + fib6_clean_tree(&table->tb6_root, func, prune, arg); + write_unlock_bh(&table->tb6_lock); + } + } + rcu_read_unlock(); +} + static int fib6_prune_clone(struct rt6_info *rt, void *arg) { if (rt->rt6i_flags & RTF_CACHE) { @@ -1143,11 +1456,8 @@ void fib6_run_gc(unsigned long dummy) } gc_args.more = 0; - - write_lock_bh(&rt6_lock); ndisc_dst_gc(&gc_args.more); - fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); - write_unlock_bh(&rt6_lock); + fib6_clean_all(fib6_age, 0, NULL); if (gc_args.more) mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); @@ -1162,10 +1472,10 @@ void __init fib6_init(void) { fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!fib6_node_kmem) - panic("cannot create fib6_nodes cache"); + + fib6_tables_init(); } void fib6_gc_cleanup(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f9ca63912fbf..1d672b0547f2 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -10,7 +10,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aceee252503d..6b8e6d76a58b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -71,6 +71,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt goto out; } + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* * Store incoming device index. When the packet will * be queued, we cannot refer to skb->dev anymore. @@ -84,14 +86,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt */ IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex; - if (skb->len < sizeof(struct ipv6hdr)) + if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - goto drop; - } - hdr = skb->nh.ipv6h; if (hdr->version != 6) @@ -114,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(skb) < 0) { + if (ipv6_parse_hopopts(&skb) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index abb94de33768..66716911962e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -28,7 +28,6 @@ * for datagram xmit */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -148,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { - if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) || + if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else @@ -230,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = sk->sk_priority; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || ipfragok) { + if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -309,6 +308,56 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) return 0; } +static int ip6_forward_proxy_check(struct sk_buff *skb) +{ + struct ipv6hdr *hdr = skb->nh.ipv6h; + u8 nexthdr = hdr->nexthdr; + int offset; + + if (ipv6_ext_hdr(nexthdr)) { + offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); + if (offset < 0) + return 0; + } else + offset = sizeof(struct ipv6hdr); + + if (nexthdr == IPPROTO_ICMPV6) { + struct icmp6hdr *icmp6; + + if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) + return 0; + + icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset); + + switch (icmp6->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + /* For reaction involving unicast neighbor discovery + * message destined to the proxied address, pass it to + * input function. + */ + return 1; + default: + break; + } + } + + /* + * The proxying router can't forward traffic sent to a link-local + * address, so signal the sender and discard the packet. This + * behavior is clarified by the MIPv6 specification. + */ + if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { + dst_link_failure(skb); + return -1; + } + + return 0; +} + static inline int ip6_forward_finish(struct sk_buff *skb) { return dst_output(skb); @@ -357,11 +406,24 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; } + /* XXX: idev->cnf.proxy_ndp? */ + if (ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + int proxied = ip6_forward_proxy_check(skb); + if (proxied > 0) + return ip6_input(skb); + else if (proxied < 0) { + IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (!xfrm6_route_forward(skb)) { IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); goto drop; @@ -475,17 +537,25 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) switch (**nexthdr) { case NEXTHDR_HOP: + break; case NEXTHDR_ROUTING: + found_rhdr = 1; + break; case NEXTHDR_DEST: - if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; - if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); +#ifdef CONFIG_IPV6_MIP6 + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + break; +#endif + if (found_rhdr) + return offset; break; default : return offset; } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); } return offset; @@ -596,6 +666,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); + if(!err) + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + if (err || !frag) break; @@ -707,12 +780,11 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); - err = output(frag); if (err) goto fail; + + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); @@ -724,48 +796,59 @@ fail: return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +static inline int ip6_rt_check(struct rt6key *rt_key, + struct in6_addr *fl_addr, + struct in6_addr *addr_cache) { - int err = 0; + return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && + (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); +} - *dst = NULL; - if (sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - - *dst = sk_dst_check(sk, np->dst_cookie); - if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; - - /* Yes, checking route validity in not connected - * case is not very simple. Take into account, - * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) - * - * 1. If route was host route, check that - * cached destination is current. - * If it is network route, we still may - * check its validity using saved pointer - * to the last used address: daddr_cache. - * We do not want to save whole address now, - * (because main consumer of this service - * is tcp, which has not this problem), - * so that the last trick works only on connected - * sockets. - * 2. oif also should be the same. - */ - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, - &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, - np->daddr_cache))) - || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { - dst_release(*dst); - *dst = NULL; - } - } +static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + struct dst_entry *dst, + struct flowi *fl) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt = (struct rt6_info *)dst; + + if (!dst) + goto out; + + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, + * and MSG_DONTROUTE --ANK (980726) + * + * 1. ip6_rt_check(): If route was host route, + * check that cached destination is current. + * If it is network route, we still may + * check its validity using saved pointer + * to the last used address: daddr_cache. + * We do not want to save whole address now, + * (because main consumer of this service + * is tcp, which has not this problem), + * so that the last trick works only on connected + * sockets. + * 2. oif also should be the same. + */ + if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || +#ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || +#endif + (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; } +out: + return dst; +} + +static int ip6_dst_lookup_tail(struct sock *sk, + struct dst_entry **dst, struct flowi *fl) +{ + int err; + if (*dst == NULL) *dst = ip6_route_output(sk, fl); @@ -774,7 +857,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); - if (err) goto out_err_release; } @@ -787,8 +869,48 @@ out_err_release: return err; } +/** + * ip6_dst_lookup - perform route lookup on flow + * @sk: socket which provides route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + return ip6_dst_lookup_tail(sk, dst, fl); +} EXPORT_SYMBOL_GPL(ip6_dst_lookup); +/** + * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * @sk: socket which provides the dst cache and route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow with the + * possibility of using the cached route in the socket if it is valid. + * It will take the socket dst lock when operating on the dst cache. + * As a result, this function can only be used in process context. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + if (sk) { + *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + *dst = ip6_sk_dst_check(sk, *dst, fl); + } + + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); + static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -822,7 +944,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -835,7 +957,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* specify the length of each IP datagram fragment*/ skb_shinfo(skb)->gso_size = mtu - fragheaderlen - sizeof(struct frag_hdr); - skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; ipv6_select_ident(skb, &fhdr); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); @@ -919,7 +1041,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { @@ -1051,7 +1173,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a995796b5a57..84d7ebdb9d21 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -19,7 +19,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> @@ -568,10 +567,9 @@ static inline struct ipv6_txoptions *create_tel(__u8 encap_limit) int opt_len = sizeof(*opt) + 8; - if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) { + if (!(opt = kzalloc(opt_len, GFP_ATOMIC))) { return NULL; } - memset(opt, 0, opt_len); opt->tot_len = opt_len; opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1); opt->opt_nflen = 8; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index f28cd37feed3..a2860e35efd7 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -30,7 +30,6 @@ * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -54,7 +53,7 @@ struct ipcomp6_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -71,7 +70,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; if (skb_linearize_cow(skb)) @@ -110,7 +109,8 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out_put_cpu; } - skb_put(skb, dlen - plen); + skb->truesize += dlen - plen; + __skb_put(skb, dlen - plen); memcpy(skb->data, scratch, dlen); err = ipch->nexthdr; @@ -129,7 +129,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) struct ipcomp_data *ipcd = x->data; int plen, dlen; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; hdr_len = skb->h.raw - skb->data; @@ -178,7 +178,7 @@ out_ok: static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { - u32 spi; + __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset); struct xfrm_state *x; @@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -234,7 +234,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) { int err = 0; struct xfrm_state *t = NULL; - u32 spi; + __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) @@ -301,7 +301,7 @@ static void **ipcomp6_alloc_scratches(void) return scratches; } -static void ipcomp6_free_tfms(struct crypto_tfm **tfms) +static void ipcomp6_free_tfms(struct crypto_comp **tfms) { struct ipcomp6_tfms *pos; int cpu; @@ -323,28 +323,28 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) { struct ipcomp6_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -358,12 +358,13 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp6_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; @@ -416,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); mutex_lock(&ipcomp6_resource_mutex); @@ -428,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; @@ -460,6 +461,7 @@ static struct xfrm_type ipcomp6_type = .destructor = ipcomp6_destroy, .input = ipcomp6_input, .output = ipcomp6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ipcomp6_protocol = diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4c20eeb3d568..de6b91981b30 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -58,9 +57,116 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; +static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, + int proto) +{ + struct inet6_protocol *ops = NULL; + + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_protos[proto]); + + if (unlikely(!ops)) + break; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + break; + + opth = (void *)skb->data; + len = opth->hdrlen * 8 + 8; + + if (unlikely(!pskb_may_pull(skb, len))) + break; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + return ops; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = skb->nh.ipv6h; + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_send_check)) { + skb->h.raw = skb->data; + err = ops->gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + + if (!(features & NETIF_F_HW_CSUM)) + features &= ~NETIF_F_SG; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = skb->nh.ipv6h; + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_segment)) { + skb->h.raw = skb->data; + segs = ops->gso_segment(skb, features); + } + rcu_read_unlock(); + + if (unlikely(IS_ERR(segs))) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = skb->nh.ipv6h; + ipv6h->payload_len = htons(skb->len - skb->mac_len - + sizeof(*ipv6h)); + } + +out: + return segs; +} + static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), .func = ipv6_rcv, + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, }; struct ip6_ra_chain *ip6_ra_chain; @@ -259,7 +365,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_TCLASS: - if (val < 0 || val > 0xff) + if (val < -1 || val > 0xff) goto e_inval; np->tclass = val; retv = 0; @@ -304,8 +410,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, /* routing header option needs extra check */ if (optname == IPV6_RTHDR && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; - if (rthdr->type) + switch (rthdr->type) { + case IPV6_SRCRT_TYPE_0: +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: +#endif + break; + default: goto sticky_done; + } + if ((rthdr->hdrlen & 1) || (rthdr->hdrlen >> 1) != rthdr->segments_left) goto sticky_done; @@ -844,6 +958,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_TCLASS: val = np->tclass; + if (val < 0) + val = 0; break; case IPV6_RECVTCLASS: diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 16482785bdfd..0e8e0676a033 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -1,5 +1,4 @@ -#include <linux/config.h> #include <linux/module.h> #include <net/protocol.h> #include <net/ipv6.h> @@ -15,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); -EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); EXPORT_SYMBOL(inet6_register_protosw); @@ -32,6 +30,8 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_input_addr); +EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(ipv6_push_nfrag_opts); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6e871afbb2c7..3b114e3fa2f8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -28,7 +28,6 @@ * - MLDv2 support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> @@ -172,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IPV6_MLD_MAX_MSF 64 -int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; +int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; /* * socket join on multicast group @@ -269,13 +268,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); + (void) ip6_mc_leave_src(sk, mc_lst, idev); if (idev) { - (void) ip6_mc_leave_src(sk,mc_lst,idev); __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } dev_put(dev); - } + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -335,13 +335,14 @@ void ipv6_sock_mc_close(struct sock *sk) if (dev) { struct inet6_dev *idev = in6_dev_get(dev); + (void) ip6_mc_leave_src(sk, mc_lst, idev); if (idev) { - (void) ip6_mc_leave_src(sk, mc_lst, idev); __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } dev_put(dev); - } + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c new file mode 100644 index 000000000000..99d116caecda --- /dev/null +++ b/net/ipv6/mip6.c @@ -0,0 +1,519 @@ +/* + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/time.h> +#include <linux/ipv6.h> +#include <linux/icmpv6.h> +#include <net/sock.h> +#include <net/ipv6.h> +#include <net/ip6_checksum.h> +#include <net/xfrm.h> +#include <net/mip6.h> + +static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) +{ + return x->coaddr; +} + +static inline unsigned int calc_padlen(unsigned int len, unsigned int n) +{ + return (n - len + 16) & 0x7; +} + +static inline void *mip6_padn(__u8 *data, __u8 padlen) +{ + if (!data) + return NULL; + if (padlen == 1) { + data[0] = MIP6_OPT_PAD_1; + } else if (padlen > 1) { + data[0] = MIP6_OPT_PAD_N; + data[1] = padlen - 2; + if (padlen > 2) + memset(data+2, 0, data[1]); + } + return data + padlen; +} + +static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) +{ + icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); +} + +static int mip6_mh_len(int type) +{ + int len = 0; + + switch (type) { + case IP6_MH_TYPE_BRR: + len = 0; + break; + case IP6_MH_TYPE_HOTI: + case IP6_MH_TYPE_COTI: + case IP6_MH_TYPE_BU: + case IP6_MH_TYPE_BACK: + len = 1; + break; + case IP6_MH_TYPE_HOT: + case IP6_MH_TYPE_COT: + case IP6_MH_TYPE_BERROR: + len = 2; + break; + } + return len; +} + +int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) +{ + struct ip6_mh *mh; + int mhlen; + + if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || + !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) + return -1; + + mh = (struct ip6_mh *)skb->h.raw; + + if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); + return -1; + } + mhlen = (mh->ip6mh_hdrlen + 1) << 3; + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + mhlen, IPPROTO_MH, + skb->csum)) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); + skb->ip_summed = CHECKSUM_NONE; + } + } + if (skb->ip_summed == CHECKSUM_NONE) { + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + mhlen, IPPROTO_MH, + skb_checksum(skb, 0, mhlen, 0))) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed " + "[" NIP6_FMT " > " NIP6_FMT "]\n", + NIP6(skb->nh.ipv6h->saddr), + NIP6(skb->nh.ipv6h->daddr)); + return -1; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + if (mh->ip6mh_proto != IPPROTO_NONE) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); + mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw); + return -1; + } + + return 0; +} + +struct mip6_report_rate_limiter { + spinlock_t lock; + struct timeval stamp; + int iif; + struct in6_addr src; + struct in6_addr dst; +}; + +static struct mip6_report_rate_limiter mip6_report_rl = { + .lock = SPIN_LOCK_UNLOCKED +}; + +static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; + + if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && + !ipv6_addr_any((struct in6_addr *)x->coaddr)) + return -ENOENT; + + return destopt->nexthdr; +} + +/* Destination Option Header is inserted. + * IP Header's src address is replaced with Home Address Option in + * Destination Option Header. + */ +static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + struct ipv6_destopt_hdr *dstopt; + struct ipv6_destopt_hao *hao; + u8 nexthdr; + int len; + + iph = (struct ipv6hdr *)skb->data; + iph->payload_len = htons(skb->len - sizeof(*iph)); + + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_DSTOPTS; + + dstopt = (struct ipv6_destopt_hdr *)skb->h.raw; + dstopt->nexthdr = nexthdr; + + hao = mip6_padn((char *)(dstopt + 1), + calc_padlen(sizeof(*dstopt), 6)); + + hao->type = IPV6_TLV_HAO; + hao->length = sizeof(*hao) - 2; + BUG_TRAP(hao->length == 16); + + len = ((char *)hao - (char *)dstopt) + sizeof(*hao); + + memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); + memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); + + BUG_TRAP(len == x->props.header_len); + dstopt->hdrlen = (x->props.header_len >> 3) - 1; + + return 0; +} + +static inline int mip6_report_rl_allow(struct timeval *stamp, + struct in6_addr *dst, + struct in6_addr *src, int iif) +{ + int allow = 0; + + spin_lock_bh(&mip6_report_rl.lock); + if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec || + mip6_report_rl.stamp.tv_usec != stamp->tv_usec || + mip6_report_rl.iif != iif || + !ipv6_addr_equal(&mip6_report_rl.src, src) || + !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { + mip6_report_rl.stamp.tv_sec = stamp->tv_sec; + mip6_report_rl.stamp.tv_usec = stamp->tv_usec; + mip6_report_rl.iif = iif; + ipv6_addr_copy(&mip6_report_rl.src, src); + ipv6_addr_copy(&mip6_report_rl.dst, dst); + allow = 1; + } + spin_unlock_bh(&mip6_report_rl.lock); + return allow; +} + +static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) +{ + struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + struct ipv6_destopt_hao *hao = NULL; + struct xfrm_selector sel; + int offset; + struct timeval stamp; + int err = 0; + + if (unlikely(fl->proto == IPPROTO_MH && + fl->fl_mh_type <= IP6_MH_TYPE_MAX)) + goto out; + + if (likely(opt->dsthao)) { + offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); + if (likely(offset >= 0)) + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset); + } + + skb_get_timestamp(skb, &stamp); + + if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr, + hao ? &hao->addr : &skb->nh.ipv6h->saddr, + opt->iif)) + goto out; + + memset(&sel, 0, sizeof(sel)); + memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr, + sizeof(sel.daddr)); + sel.prefixlen_d = 128; + memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr, + sizeof(sel.saddr)); + sel.prefixlen_s = 128; + sel.family = AF_INET6; + sel.proto = fl->proto; + sel.dport = xfrm_flowi_dport(fl); + if (sel.dport) + sel.dport_mask = ~((__u16)0); + sel.sport = xfrm_flowi_sport(fl); + if (sel.sport) + sel.sport_mask = ~((__u16)0); + sel.ifindex = fl->oif; + + err = km_report(IPPROTO_DSTOPTS, &sel, + (hao ? (xfrm_address_t *)&hao->addr : NULL)); + + out: + return err; +} + +static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, + u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); + unsigned int packet_len = skb->tail - skb->nh.raw; + int found_rhdr = 0; + + *nexthdr = &skb->nh.ipv6h->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + found_rhdr = 1; + break; + case NEXTHDR_DEST: + /* + * HAO MUST NOT appear more than once. + * XXX: It is better to try to find by the end of + * XXX: packet if HAO exists. + */ + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { + LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + return offset; + } + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + } + + return offset; +} + +static int mip6_destopt_init_state(struct xfrm_state *x) +{ + if (x->id.spi) { + printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + x->id.spi); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { + printk(KERN_INFO "%s: state's mode is not %u: %u\n", + __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + return -EINVAL; + } + + x->props.header_len = sizeof(struct ipv6_destopt_hdr) + + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + + sizeof(struct ipv6_destopt_hao); + BUG_TRAP(x->props.header_len == 24); + + return 0; +} + +/* + * Do nothing about destroying since it has no specific operation for + * destination options header unlike IPsec protocols. + */ +static void mip6_destopt_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type mip6_destopt_type = +{ + .description = "MIP6DESTOPT", + .owner = THIS_MODULE, + .proto = IPPROTO_DSTOPTS, + .flags = XFRM_TYPE_NON_FRAGMENT, + .init_state = mip6_destopt_init_state, + .destructor = mip6_destopt_destroy, + .input = mip6_destopt_input, + .output = mip6_destopt_output, + .reject = mip6_destopt_reject, + .hdr_offset = mip6_destopt_offset, + .local_addr = mip6_xfrm_addr, +}; + +static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; + + if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && + !ipv6_addr_any((struct in6_addr *)x->coaddr)) + return -ENOENT; + + return rt2->rt_hdr.nexthdr; +} + +/* Routing Header type 2 is inserted. + * IP Header's dst address is replaced with Routing Header's Home Address. + */ +static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + struct rt2_hdr *rt2; + u8 nexthdr; + + iph = (struct ipv6hdr *)skb->data; + iph->payload_len = htons(skb->len - sizeof(*iph)); + + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_ROUTING; + + rt2 = (struct rt2_hdr *)skb->h.raw; + rt2->rt_hdr.nexthdr = nexthdr; + rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; + rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; + rt2->rt_hdr.segments_left = 1; + memset(&rt2->reserved, 0, sizeof(rt2->reserved)); + + BUG_TRAP(rt2->rt_hdr.hdrlen == 2); + + memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); + memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); + + return 0; +} + +static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, + u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); + unsigned int packet_len = skb->tail - skb->nh.raw; + int found_rhdr = 0; + + *nexthdr = &skb->nh.ipv6h->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + if (offset + 3 <= packet_len) { + struct ipv6_rt_hdr *rt; + rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset); + if (rt->type != 0) + return offset; + } + found_rhdr = 1; + break; + case NEXTHDR_DEST: + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + return offset; + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + } + + return offset; +} + +static int mip6_rthdr_init_state(struct xfrm_state *x) +{ + if (x->id.spi) { + printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + x->id.spi); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { + printk(KERN_INFO "%s: state's mode is not %u: %u\n", + __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + return -EINVAL; + } + + x->props.header_len = sizeof(struct rt2_hdr); + + return 0; +} + +/* + * Do nothing about destroying since it has no specific operation for routing + * header type 2 unlike IPsec protocols. + */ +static void mip6_rthdr_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type mip6_rthdr_type = +{ + .description = "MIP6RT", + .owner = THIS_MODULE, + .proto = IPPROTO_ROUTING, + .flags = XFRM_TYPE_NON_FRAGMENT, + .init_state = mip6_rthdr_init_state, + .destructor = mip6_rthdr_destroy, + .input = mip6_rthdr_input, + .output = mip6_rthdr_output, + .hdr_offset = mip6_rthdr_offset, + .remote_addr = mip6_xfrm_addr, +}; + +int __init mip6_init(void) +{ + printk(KERN_INFO "Mobile IPv6\n"); + + if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { + printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__); + goto mip6_destopt_xfrm_fail; + } + if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { + printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); + goto mip6_rthdr_xfrm_fail; + } + return 0; + + mip6_rthdr_xfrm_fail: + xfrm_unregister_type(&mip6_destopt_type, AF_INET6); + mip6_destopt_xfrm_fail: + return -EAGAIN; +} + +void __exit mip6_fini(void) +{ + if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); + if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); +} diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index dfa20d3be9b6..0304b5fe8d6a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -48,7 +48,6 @@ #endif #include <linux/module.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -63,6 +62,7 @@ #include <linux/sysctl.h> #endif +#include <linux/if_addr.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> @@ -412,7 +412,8 @@ static void pndisc_destructor(struct pneigh_entry *n) */ static inline void ndisc_flow_init(struct flowi *fl, u8 type, - struct in6_addr *saddr, struct in6_addr *daddr) + struct in6_addr *saddr, struct in6_addr *daddr, + int oif) { memset(fl, 0, sizeof(*fl)); ipv6_addr_copy(&fl->fl6_src, saddr); @@ -420,6 +421,8 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; + fl->oif = oif; + security_sk_classify_flow(ndisc_socket->sk, fl); } static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, @@ -451,7 +454,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = &tmpaddr; } - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr); + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr, + dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -492,7 +496,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, msg->icmph.icmp6_unused = 0; msg->icmph.icmp6_router = router; msg->icmph.icmp6_solicited = solicited; - msg->icmph.icmp6_override = !!override; + msg->icmph.icmp6_override = override; /* Set the target address. */ ipv6_addr_copy(&msg->target, solicited_addr); @@ -541,7 +545,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, saddr = &addr_buf; } - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr); + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr, + dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -616,7 +621,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, int len; int err; - ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr); + ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr, + dev->ifindex); dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output); if (!dst) @@ -730,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; + struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; + int is_router; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -816,7 +824,9 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { + (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && + (pneigh = pneigh_lookup(&nd_tbl, + &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -837,12 +847,14 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } + is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding); + if (dad) { struct in6_addr maddr; ipv6_addr_all_nodes(&maddr); ndisc_send_na(dev, NULL, &maddr, &msg->target, - idev->cnf.forwarding, 0, (ifp != NULL), 1); + is_router, 0, (ifp != NULL), 1); goto out; } @@ -863,7 +875,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->hard_header) { ndisc_send_na(dev, neigh, saddr, &msg->target, - idev->cnf.forwarding, + is_router, 1, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); @@ -946,6 +958,20 @@ static void ndisc_recv_na(struct sk_buff *skb) if (neigh->nud_state & NUD_FAILED) goto out; + /* + * Don't update the neighbor cache entry on a proxy NA from + * ourselves because either the proxied node is off link or it + * has already sent a NA to us. + */ + if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && + ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + /* XXX: idev->cnf.prixy_ndp */ + WARN_ON(skb->dst != NULL && + ((struct rt6_info *)skb->dst)->rt6i_idev); + goto out; + } + neigh_update(neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| @@ -960,7 +986,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct rt6_info *rt; rt = rt6_get_dflt_router(saddr, dev); if (rt) - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); } out: @@ -1113,7 +1139,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt && lifetime == 0) { neigh_clone(neigh); - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } @@ -1345,7 +1371,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); if (neigh) { - rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, + rt6_redirect(dest, &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr, neigh, lladdr, on_link); neigh_release(neigh); } @@ -1381,7 +1408,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr); + ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr, + dev->ifindex); dst = ip6_route_output(NULL, &fl); if (dst == NULL) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 395a417ba955..580b1aba6722 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index eeeb57d4c9c5..ac1dfebde175 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -5,7 +5,7 @@ # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o -obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o +obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index b4b7d441af25..9510c24ca8d2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -56,15 +56,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; +static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; +static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; +static int peer_pid __read_mostly; +static unsigned int copy_range __read_mostly; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl; +static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); @@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } @@ -505,7 +505,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (type <= IPQM_BASE) return; - if (security_netlink_recv(skb)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); write_lock_bh(&queue_lock); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e72f89a7019..4ab368fa0b8f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -19,13 +19,13 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> +#include <linux/poison.h> #include <linux/icmpv6.h> #include <net/ipv6.h> #include <asm/uaccess.h> @@ -70,9 +70,6 @@ do { \ #define IP_NF_ASSERT(x) #endif - -#include <linux/netfilter_ipv4/listhelp.h> - #if 0 /* All the better to debug you with... */ #define static @@ -220,8 +217,7 @@ ip6t_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip6_tables: error: `%s'\n", (char *)targinfo); @@ -258,8 +254,7 @@ ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct xt_table *table, - void *userdata) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; @@ -349,8 +344,7 @@ ip6t_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ip6t_entry *)table_base)->comefrom @@ -377,7 +371,7 @@ ip6t_do_table(struct sk_buff **pskb, } while (!hotdrop); #ifdef CONFIG_NETFILTER_DEBUG - ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac; + ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif read_unlock_bh(&table->lock); @@ -507,8 +501,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -561,7 +554,6 @@ check_match(struct ip6t_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -618,12 +610,10 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ip6t_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto cleanup_matches; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -695,8 +685,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) IP6T_MATCH_ITERATE(e, cleanup_match, NULL); t = ip6t_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -1281,7 +1270,8 @@ int ip6t_register_table(struct xt_table *table, return ret; } - if (xt_register_table(table, &bootstrap, newinfo) != 0) { + ret = xt_register_table(table, &bootstrap, newinfo); + if (ret != 0) { xt_free_table_info(newinfo); return ret; } @@ -1351,7 +1341,6 @@ icmp6_checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_icmp *icmpinfo = matchinfo; @@ -1397,23 +1386,39 @@ static int __init ip6_tables_init(void) { int ret; - xt_proto_init(AF_INET6); + ret = xt_proto_init(AF_INET6); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&ip6t_standard_target); - xt_register_target(&ip6t_error_target); - xt_register_match(&icmp6_matchstruct); + ret = xt_register_target(&ip6t_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&ip6t_error_target); + if (ret < 0) + goto err3; + ret = xt_register_match(&icmp6_matchstruct); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - xt_proto_fini(AF_INET6); - return ret; - } + if (ret < 0) + goto err5; printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_match(&icmp6_matchstruct); +err4: + xt_unregister_target(&ip6t_error_target); +err3: + xt_unregister_target(&ip6t_standard_target); +err2: + xt_proto_fini(AF_INET6); +err1: + return ret; } static void __exit ip6_tables_fini(void) diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index b8eff8ee69b1..435750f664dd 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -22,11 +22,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; - u_int16_t diffs[2]; int new_hl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -53,11 +52,8 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, break; } - if (new_hl != ip6h->hop_limit) { - diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; + if (new_hl != ip6h->hop_limit) ip6h->hop_limit = new_hl; - diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); - } return IP6T_CONTINUE; } @@ -66,7 +62,6 @@ static int ip6t_hl_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip6t_HL_info *info = targinfo; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 73c6300109d6..0cf537d30185 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ip6t_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -452,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index de1175c27f6d..311eae82feb3 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> @@ -97,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb) ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; + security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; @@ -180,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ip6t_reject_info *reject = targinfo; @@ -224,7 +223,6 @@ static int check(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; @@ -257,9 +255,7 @@ static struct ip6t_target ip6t_reject_reg = { static int __init ip6t_reject_init(void) { - if (ip6t_register_target(&ip6t_reject_reg)) - return -EINVAL; - return 0; + return ip6t_register_target(&ip6t_reject_reg); } static void __exit ip6t_reject_fini(void) diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 2f7bb20c758b..ec1b1608156c 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -102,7 +102,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_ah *ahinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c deleted file mode 100644 index 9422413d0571..000000000000 --- a/net/ipv6/netfilter/ip6t_dst.c +++ /dev/null @@ -1,220 +0,0 @@ -/* Kernel module to match Hop-by-Hop and Destination parameters. */ - -/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ipv6.h> -#include <linux/types.h> -#include <net/checksum.h> -#include <net/ipv6.h> - -#include <asm/byteorder.h> - -#include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter_ipv6/ip6t_opts.h> - -#define HOPBYHOP 0 - -MODULE_LICENSE("GPL"); -#if HOPBYHOP -MODULE_DESCRIPTION("IPv6 HbH match"); -#else -MODULE_DESCRIPTION("IPv6 DST match"); -#endif -MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -/* - * (Type & 0xC0) >> 6 - * 0 -> ignorable - * 1 -> must drop the packet - * 2 -> send ICMP PARM PROB regardless and drop packet - * 3 -> Send ICMP if not a multicast address and drop packet - * (Type & 0x20) >> 5 - * 0 -> invariant - * 1 -> can change the routing - * (Type & 0x1F) Type - * 0 -> Pad1 (only 1 byte!) - * 1 -> PadN LENGTH info (total length = length + 2) - * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) - * 5 -> RTALERT 2 x x - */ - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - struct ipv6_opt_hdr _optsh, *oh; - const struct ip6t_opts *optinfo = matchinfo; - unsigned int temp; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int ret = 0; - u8 _opttype, *tp = NULL; - u8 _optlen, *lp = NULL; - unsigned int optlen; - -#if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) -#else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) -#endif - return 0; - - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - if (oh == NULL) { - *hotdrop = 1; - return 0; - } - - hdrlen = ipv6_optlen(oh); - if (skb->len - ptr < hdrlen) { - /* Packet smaller than it's length field */ - return 0; - } - - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); - - DEBUGP("len %02X %04X %02X ", - optinfo->hdrlen, hdrlen, - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); - - ret = (oh != NULL) && - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); - - ptr += 2; - hdrlen -= 2; - if (!(optinfo->flags & IP6T_OPTS_OPTS)) { - return ret; - } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - DEBUGP("Not strict - not implemented"); - } else { - DEBUGP("Strict "); - DEBUGP("#%d ", optinfo->optsnr); - for (temp = 0; temp < optinfo->optsnr; temp++) { - /* type field exists ? */ - if (hdrlen < 1) - break; - tp = skb_header_pointer(skb, ptr, sizeof(_opttype), - &_opttype); - if (tp == NULL) - break; - - /* Type check */ - if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { - DEBUGP("Tbad %02X %02X\n", - *tp, - (optinfo->opts[temp] & 0xFF00) >> 8); - return 0; - } else { - DEBUGP("Tok "); - } - /* Length check */ - if (*tp) { - u16 spec_len; - - /* length field exists ? */ - if (hdrlen < 2) - break; - lp = skb_header_pointer(skb, ptr + 1, - sizeof(_optlen), - &_optlen); - if (lp == NULL) - break; - spec_len = optinfo->opts[temp] & 0x00FF; - - if (spec_len != 0x00FF && spec_len != *lp) { - DEBUGP("Lbad %02X %04X\n", *lp, - spec_len); - return 0; - } - DEBUGP("Lok "); - optlen = *lp + 2; - } else { - DEBUGP("Pad1\n"); - optlen = 1; - } - - /* Step to the next */ - DEBUGP("len%04X \n", optlen); - - if ((ptr > skb->len - optlen || hdrlen < optlen) && - (temp < optinfo->optsnr - 1)) { - DEBUGP("new pointer is too large! \n"); - break; - } - ptr += optlen; - hdrlen -= optlen; - } - if (temp == optinfo->optsnr) - return ret; - else - return 0; - } - - return 0; -} - -/* Called when user tries to insert an entry of this type. */ -static int -checkentry(const char *tablename, - const void *info, - const struct xt_match *match, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) -{ - const struct ip6t_opts *optsinfo = matchinfo; - - if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { - DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); - return 0; - } - return 1; -} - -static struct ip6t_match opts_match = { -#if HOPBYHOP - .name = "hbh", -#else - .name = "dst", -#endif - .match = match, - .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ip6t_dst_init(void) -{ - return ip6t_register_match(&opts_match); -} - -static void __exit ip6t_dst_fini(void) -{ - ip6t_unregister_match(&opts_match); -} - -module_init(ip6t_dst_init); -module_exit(ip6t_dst_fini); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 06768c84bd31..78d9c8b9e28a 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -119,7 +119,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_frag *fraginfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 374f1be85c0d..d32a205e3af2 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -19,15 +19,10 @@ #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_opts.h> -#define HOPBYHOP 1 - MODULE_LICENSE("GPL"); -#if HOPBYHOP -MODULE_DESCRIPTION("IPv6 HbH match"); -#else -MODULE_DESCRIPTION("IPv6 DST match"); -#endif +MODULE_DESCRIPTION("IPv6 opts match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); +MODULE_ALIAS("ip6t_dst"); #if 0 #define DEBUGP printk @@ -71,11 +66,7 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; -#if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) -#else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) -#endif + if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0) return 0; oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); @@ -182,7 +173,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; @@ -194,26 +184,35 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match opts_match = { -#if HOPBYHOP - .name = "hbh", -#else - .name = "dst", -#endif - .match = match, - .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, - .me = THIS_MODULE, +static struct xt_match opts_match[] = { + { + .name = "hbh", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct ip6t_opts), + .checkentry = checkentry, + .me = THIS_MODULE, + .data = NEXTHDR_HOP, + }, + { + .name = "dst", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct ip6t_opts), + .checkentry = checkentry, + .me = THIS_MODULE, + .data = NEXTHDR_DEST, + }, }; static int __init ip6t_hbh_init(void) { - return ip6t_register_match(&opts_match); + return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); } static void __exit ip6t_hbh_fini(void) { - ip6t_unregister_match(&opts_match); + xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); } module_init(ip6t_hbh_init); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 9375eeb1369f..3093c398002f 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ipv6header_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 5d047990cd44..4eb9bbc4ebc3 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -57,7 +57,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_owner_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index fbb0184a41d8..bcb2e168a5bc 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -197,7 +197,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_rt *rtinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 60976c0c58e8..2fc07c74decf 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 03a13eab1dae..386ea260e767 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -174,18 +174,14 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) || (*pskb)->nfmark != nfmark - || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { - - /* something which could affect routing has changed */ - - DEBUGP("ip6table_mangle: we'd need to re-route a packet\n"); - } + || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) + return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; return ret; } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 61a7c58e99f8..b4154da575c0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 2a71c3b669f1..e5e53fff9e38 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -20,7 +20,6 @@ * structures. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ipv6.h> #include <linux/in6.h> @@ -336,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { /* From nf_conntrack_proto_icmpv6.c */ extern unsigned int nf_ct_icmpv6_timeout; -/* From nf_conntrack_frag6.c */ +/* From nf_conntrack_reasm.c */ extern unsigned int nf_ct_frag6_timeout; extern unsigned int nf_ct_frag6_low_thresh; extern unsigned int nf_ct_frag6_high_thresh; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ef18a7b7014b..34d447208ffd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -33,7 +33,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> -unsigned long nf_ct_icmpv6_timeout = 30*HZ; +unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c32a029e43f0..bf93c1ea6be9 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -14,7 +14,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -55,9 +54,9 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh = 256*1024; -unsigned int nf_ct_frag6_low_thresh = 192*1024; -unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; +unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; +unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; +unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; struct nf_ct_frag6_skb_cb { @@ -409,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, return -1; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr + 1) - skb->nh.raw, @@ -641,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &nf_ct_frag6_mem); @@ -653,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->nh.ipv6h->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) + if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); fq->fragments = NULL; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 779ddf77f4d4..efee7a6301a8 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -17,7 +17,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/socket.h> #include <linux/net.h> diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fa1ce0ae123e..d09329ca3267 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,6 +50,9 @@ #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> +#ifdef CONFIG_IPV6_MIP6 +#include <net/mip6.h> +#endif #include <net/rawv6.h> #include <net/xfrm.h> @@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { + int filtered; + delivered = 1; - if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { + switch (nexthdr) { + case IPPROTO_ICMPV6: + filtered = icmpv6_filter(sk, skb); + break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + /* XXX: To validate MH only once for each packet, + * this is placed here. It should be after checking + * xfrm policy, however it doesn't. The checking xfrm + * policy is placed in rawv6_rcv() because it is + * required for each socket. + */ + filtered = mip6_mh_filter(sk, skb); + break; +#endif + default: + filtered = 0; + break; + } + + if (filtered < 0) + break; + if (filtered == 0) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ @@ -334,7 +361,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { skb_postpull_rcsum(skb, skb->nh.raw, skb->h.raw - skb->nh.raw); if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, @@ -411,6 +438,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, /* Copy the address. */ if (sin6) { sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr); sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; @@ -581,6 +609,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) struct iovec *iov; u8 __user *type = NULL; u8 __user *code = NULL; +#ifdef CONFIG_IPV6_MIP6 + u8 len = 0; +#endif int probed = 0; int i; @@ -612,6 +643,20 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) probed = 1; } break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + if (iov->iov_base && iov->iov_len < 1) + break; + /* check if type field is readable or not. */ + if (iov->iov_len > 2 - len) { + u8 __user *p = iov->iov_base; + get_user(fl->fl_mh_type, &p[2 - len]); + probed = 1; + } else + len += iov->iov_len; + + break; +#endif default: probed = 1; break; @@ -758,6 +803,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -780,7 +826,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (tclass < 0) { - tclass = np->cork.tclass; + tclass = np->tclass; if (tclass < 0) tclass = 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index eef985e010ea..f39bbedd1327 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -28,7 +28,6 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -54,10 +53,10 @@ #include <net/ndisc.h> #include <net/addrconf.h> -int sysctl_ip6frag_high_thresh = 256*1024; -int sysctl_ip6frag_low_thresh = 192*1024; +int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; +int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; -int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; +int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; struct ip6frag_skb_cb { @@ -153,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, } static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval = 10 * 60 * HZ; +int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; static void ip6_frag_secret_rebuild(unsigned long dummy) { @@ -434,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); @@ -648,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip6_frag_mem); @@ -663,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, *skb_in = head; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) + if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8a777932786d..d6b4b4f48d18 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -22,10 +22,11 @@ * routers in REACHABLE, STALE, DELAY or PROBE states). * - always select the same router if it is (probably) * reachable. otherwise, round-robin the list. + * Ville Nuorvala + * Fixed routing subtrees. */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/times.h> @@ -36,7 +37,6 @@ #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/init.h> -#include <linux/netlink.h> #include <linux/if_arp.h> #ifdef CONFIG_PROC_FS @@ -54,6 +54,8 @@ #include <linux/rtnetlink.h> #include <net/dst.h> #include <net/xfrm.h> +#include <net/netevent.h> +#include <net/netlink.h> #include <asm/uaccess.h> @@ -74,9 +76,6 @@ #define CLONE_OFFLINK_ROUTE 0 -#define RT6_SELECT_F_IFACE 0x1 -#define RT6_SELECT_F_REACHABLE 0x2 - static int ip6_rt_max_size = 4096; static int ip6_rt_gc_min_interval = HZ / 2; static int ip6_rt_gc_timeout = 60*HZ; @@ -140,15 +139,49 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -struct fib6_node ip6_routing_table = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, -}; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES -/* Protects all the ip6 fib */ +struct rt6_info ip6_prohibit_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_prohibit_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; -DEFINE_RWLOCK(rt6_lock); +struct rt6_info ip6_blk_hole_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_blk_hole_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; +#endif /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) @@ -188,8 +221,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) time_after(jiffies, rt->rt6i_expires)); } +static inline int rt6_need_strict(struct in6_addr *daddr) +{ + return (ipv6_addr_type(daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); +} + /* - * Route lookup. Any rt6_lock is implied. + * Route lookup. Any table->tb6_lock is implied. */ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, @@ -298,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int m, n; m = rt6_check_dev(rt, oif); - if (!m && (strict & RT6_SELECT_F_IFACE)) + if (!m && (strict & RT6_LOOKUP_F_IFACE)) return -1; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; @@ -306,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, n = rt6_check_neigh(rt); if (n > 1) m |= 16; - else if (!n && strict & RT6_SELECT_F_REACHABLE) + else if (!n && strict & RT6_LOOKUP_F_REACHABLE) return -1; return m; } @@ -346,10 +385,10 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, } if (!match && - (strict & RT6_SELECT_F_REACHABLE) && + (strict & RT6_LOOKUP_F_REACHABLE) && last && last != rt0) { /* no entries matched; do round-robin */ - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); spin_lock(&lock); *head = rt0->u.next; rt0->u.next = last->u.next; @@ -417,7 +456,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); if (rt && !lifetime) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } @@ -441,44 +480,95 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +#define BACKTRACK(saddr) \ +do { \ + if (rt == &ip6_null_entry) { \ + struct fib6_node *pn; \ + while (fn) { \ + if (fn->fn_flags & RTN_TL_ROOT) \ + goto out; \ + pn = fn->parent; \ + if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ + fn = fib6_lookup(pn->subtree, NULL, saddr); \ + else \ + fn = pn; \ + if (fn->fn_flags & RTN_RTINFO) \ + goto restart; \ + } \ + } \ +} while(0) + +static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt; - read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, daddr, saddr); - rt = rt6_device_match(fn->leaf, oif, strict); + read_lock_bh(&table->tb6_lock); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); +restart: + rt = fn->leaf; + rt = rt6_device_match(rt, fl->oif, flags); + BACKTRACK(&fl->fl6_src); +out: dst_hold(&rt->u.dst); - rt->u.dst.__use++; - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); rt->u.dst.lastuse = jiffies; - if (rt->u.dst.error == 0) - return rt; - dst_release(&rt->u.dst); + rt->u.dst.__use++; + + return rt; + +} + +struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, + int oif, int strict) +{ + struct flowi fl = { + .oif = oif, + .nl_u = { + .ip6_u = { + .daddr = *daddr, + /* TODO: saddr */ + }, + }, + }; + struct dst_entry *dst; + int flags = strict ? RT6_LOOKUP_F_IFACE : 0; + + dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + if (dst->error == 0) + return (struct rt6_info *) dst; + + dst_release(dst); + return NULL; } -/* ip6_ins_rt is called with FREE rt6_lock. +/* ip6_ins_rt is called with FREE table->tb6_lock. It takes new route entry, the addition fails by any reason the route is freed. In any case, if caller does not hold it, it may be destroyed. */ -int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) { int err; + struct fib6_table *table; - write_lock_bh(&rt6_lock); - err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); - write_unlock_bh(&rt6_lock); + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); + err = fib6_add(&table->tb6_root, rt, info); + write_unlock_bh(&table->tb6_lock); return err; } +int ip6_ins_rt(struct rt6_info *rt) +{ + return __ip6_ins_rt(rt, NULL); +} + static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -532,51 +622,39 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -#define BACKTRACK() \ -if (rt == &ip6_null_entry) { \ - while ((fn = fn->parent) != NULL) { \ - if (fn->fn_flags & RTN_ROOT) { \ - goto out; \ - } \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ -} - - -void ip6_route_input(struct sk_buff *skb) +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict; + int strict = 0; int attempts = 3; int err; - int reachable = RT6_SELECT_F_REACHABLE; + int reachable = RT6_LOOKUP_F_REACHABLE; - strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; + strict |= flags & RT6_LOOKUP_F_IFACE; relookup: - read_lock_bh(&rt6_lock); + read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, - &skb->nh.ipv6h->saddr); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); - BACKTRACK(); + rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); + BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); + nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); else { #if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); + nrt = rt6_alloc_clone(rt, &fl->fl6_dst); #else goto out2; #endif @@ -587,7 +665,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); + err = ip6_ins_rt(nrt); if (!err) goto out2; } @@ -596,7 +674,7 @@ restart: goto out2; /* - * Race condition! In the gap, when rt6_lock was + * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -608,40 +686,63 @@ out: goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - skb->dst = (struct dst_entry *) rt; - return; + + return rt; } -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +void ip6_route_input(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct flowi fl = { + .iif = skb->dev->ifindex, + .nl_u = { + .ip6_u = { + .daddr = iph->daddr, + .saddr = iph->saddr, +#ifdef CONFIG_IPV6_ROUTE_FWMARK + .fwmark = skb->nfmark, +#endif + .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, + }, + }, + .proto = iph->nexthdr, + }; + int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0; + + skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); +} + +static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict; + int strict = 0; int attempts = 3; int err; - int reachable = RT6_SELECT_F_REACHABLE; + int reachable = RT6_LOOKUP_F_REACHABLE; - strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; + strict |= flags & RT6_LOOKUP_F_IFACE; relookup: - read_lock_bh(&rt6_lock); + read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); - BACKTRACK(); + BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); @@ -658,7 +759,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, NULL); + err = ip6_ins_rt(nrt); if (!err) goto out2; } @@ -667,7 +768,7 @@ restart: goto out2; /* - * Race condition! In the gap, when rt6_lock was + * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -679,11 +780,21 @@ out: goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - return &rt->u.dst; + return rt; +} + +struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +{ + int flags = 0; + + if (rt6_need_strict(&fl->fl6_dst)) + flags |= RT6_LOOKUP_F_IFACE; + + return fib6_rule_lookup(fl, flags, ip6_pol_route_output); } @@ -709,7 +820,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); else dst_release(dst); } @@ -743,11 +854,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; } dst->metrics[RTAX_MTU-1] = mtu; + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } -/* Protected by rt6_lock. */ -static struct dst_entry *ndisc_dst_gc_list; static int ipv6_get_mtu(struct net_device *dev); static inline unsigned int ipv6_advmss(unsigned int mtu) @@ -768,6 +878,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } +static struct dst_entry *ndisc_dst_gc_list; +static DEFINE_SPINLOCK(ndisc_lock); + struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct neighbour *neigh, struct in6_addr *addr, @@ -808,10 +921,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - write_lock_bh(&rt6_lock); + spin_lock_bh(&ndisc_lock); rt->u.dst.next = ndisc_dst_gc_list; ndisc_dst_gc_list = &rt->u.dst; - write_unlock_bh(&rt6_lock); + spin_unlock_bh(&ndisc_lock); fib6_force_start_gc(); @@ -825,8 +938,11 @@ int ndisc_dst_gc(int *more) int freed; next = NULL; + freed = 0; + + spin_lock_bh(&ndisc_lock); pprev = &ndisc_dst_gc_list; - freed = 0; + while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; @@ -838,6 +954,8 @@ int ndisc_dst_gc(int *more) } } + spin_unlock_bh(&ndisc_lock); + return freed; } @@ -898,28 +1016,24 @@ int ipv6_get_hoplimit(struct net_device *dev) * */ -int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +int ip6_route_add(struct fib6_config *cfg) { int err; - struct rtmsg *r; - struct rtattr **rta; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; + struct fib6_table *table; int addr_type; - rta = (struct rtattr **) _rtattr; - - if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) return -EINVAL; #ifndef CONFIG_IPV6_SUBTREES - if (rtmsg->rtmsg_src_len) + if (cfg->fc_src_len) return -EINVAL; #endif - if (rtmsg->rtmsg_ifindex) { + if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(rtmsg->rtmsg_ifindex); + dev = dev_get_by_index(cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -927,8 +1041,14 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, goto out; } - if (rtmsg->rtmsg_metric == 0) - rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; + if (cfg->fc_metric == 0) + cfg->fc_metric = IP6_RT_PRIO_USER; + + table = fib6_new_table(cfg->fc_table); + if (table == NULL) { + err = -ENOBUFS; + goto out; + } rt = ip6_dst_alloc(); @@ -938,14 +1058,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); - if (nlh && (r = NLMSG_DATA(nlh))) { - rt->rt6i_protocol = r->rtm_protocol; - } else { - rt->rt6i_protocol = RTPROT_BOOT; - } + rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); - addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); + if (cfg->fc_protocol == RTPROT_UNSPEC) + cfg->fc_protocol = RTPROT_BOOT; + rt->rt6i_protocol = cfg->fc_protocol; + + addr_type = ipv6_addr_type(&cfg->fc_dst); if (addr_type & IPV6_ADDR_MULTICAST) rt->u.dst.input = ip6_mc_input; @@ -954,24 +1073,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, rt->u.dst.output = ip6_output; - ipv6_addr_prefix(&rt->rt6i_dst.addr, - &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); - rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; + ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); + rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) rt->u.dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, - &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); - rt->rt6i_src.plen = rtmsg->rtmsg_src_len; + ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); + rt->rt6i_src.plen = cfg->fc_src_len; #endif - rt->rt6i_metric = rtmsg->rtmsg_metric; + rt->rt6i_metric = cfg->fc_metric; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes */ - if ((rtmsg->rtmsg_flags&RTF_REJECT) || + if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ if (dev != &loopback_dev) { @@ -994,12 +1111,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, goto install_route; } - if (rtmsg->rtmsg_flags & RTF_GATEWAY) { + if (cfg->fc_flags & RTF_GATEWAY) { struct in6_addr *gw_addr; int gwa_type; - gw_addr = &rtmsg->rtmsg_gateway; - ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); + gw_addr = &cfg->fc_gateway; + ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); gwa_type = ipv6_addr_type(gw_addr); if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -1016,7 +1133,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); + grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1048,7 +1165,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (dev == NULL) goto out; - if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { + if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); if (IS_ERR(rt->rt6i_nexthop)) { err = PTR_ERR(rt->rt6i_nexthop); @@ -1057,24 +1174,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, } } - rt->rt6i_flags = rtmsg->rtmsg_flags; + rt->rt6i_flags = cfg->fc_flags; install_route: - if (rta && rta[RTA_METRICS-1]) { - int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); - struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); - - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) { + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) { err = -EINVAL; goto out; } - rt->u.dst.metrics[flavor-1] = - *(u32 *)RTA_DATA(attr); + + rt->u.dst.metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } @@ -1086,7 +1203,8 @@ install_route: rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; - return ip6_ins_rt(rt, nlh, _rtattr, req); + rt->rt6i_table = table; + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: if (dev) @@ -1098,51 +1216,65 @@ out: return err; } -int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; + struct fib6_table *table; - write_lock_bh(&rt6_lock); + if (rt == &ip6_null_entry) + return -ENOENT; - err = fib6_del(rt, nlh, _rtattr, req); + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); + + err = fib6_del(rt, info); dst_release(&rt->u.dst); - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return err; } -static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int ip6_del_rt(struct rt6_info *rt) { + return __ip6_del_rt(rt, NULL); +} + +static int ip6_route_del(struct fib6_config *cfg) +{ + struct fib6_table *table; struct fib6_node *fn; struct rt6_info *rt; int err = -ESRCH; - read_lock_bh(&rt6_lock); + table = fib6_get_table(cfg->fc_table); + if (table == NULL) + return err; - fn = fib6_locate(&ip6_routing_table, - &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, - &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); + read_lock_bh(&table->tb6_lock); + + fn = fib6_locate(&table->tb6_root, + &cfg->fc_dst, cfg->fc_dst_len, + &cfg->fc_src, cfg->fc_src_len); if (fn) { for (rt = fn->leaf; rt; rt = rt->u.next) { - if (rtmsg->rtmsg_ifindex && + if (cfg->fc_ifindex && (rt->rt6i_dev == NULL || - rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) + rt->rt6i_dev->ifindex != cfg->fc_ifindex)) continue; - if (rtmsg->rtmsg_flags&RTF_GATEWAY && - !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) continue; - if (rtmsg->rtmsg_metric && - rtmsg->rtmsg_metric != rt->rt6i_metric) + if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); - return ip6_del_rt(rt, nlh, _rtattr, req); + return __ip6_del_rt(rt, &cfg->fc_nlinfo); } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); return err; } @@ -1150,11 +1282,17 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r /* * Handle redirects */ -void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, - struct neighbour *neigh, u8 *lladdr, int on_link) +struct ip6rd_flowi { + struct flowi fl; + struct in6_addr gateway; +}; + +static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, + struct flowi *fl, + int flags) { - struct rt6_info *rt, *nrt = NULL; - int strict; + struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; + struct rt6_info *rt; struct fib6_node *fn; /* @@ -1167,10 +1305,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, * is a bit fuzzy and one might need to check all possible * routes. */ - strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); - read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, dest, NULL); + read_lock_bh(&table->tb6_lock); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* @@ -1185,29 +1322,60 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (neigh->dev != rt->rt6i_dev) + if (fl->oif != rt->rt6i_dev->ifindex) continue; - if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) + if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; break; } - if (rt) - dst_hold(&rt->u.dst); - else if (strict) { - while ((fn = fn->parent) != NULL) { - if (fn->fn_flags & RTN_ROOT) - break; - if (fn->fn_flags & RTN_RTINFO) - goto restart; - } - } - read_unlock_bh(&rt6_lock); - if (!rt) { + if (!rt) + rt = &ip6_null_entry; + BACKTRACK(&fl->fl6_src); +out: + dst_hold(&rt->u.dst); + + read_unlock_bh(&table->tb6_lock); + + return rt; +}; + +static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, + struct in6_addr *src, + struct in6_addr *gateway, + struct net_device *dev) +{ + struct ip6rd_flowi rdfl = { + .fl = { + .oif = dev->ifindex, + .nl_u = { + .ip6_u = { + .daddr = *dest, + .saddr = *src, + }, + }, + }, + .gateway = *gateway, + }; + int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0; + + return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); +} + +void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, + struct in6_addr *saddr, + struct neighbour *neigh, u8 *lladdr, int on_link) +{ + struct rt6_info *rt, *nrt = NULL; + struct netevent_redirect netevent; + + rt = ip6_route_redirect(dest, src, saddr, neigh->dev); + + if (rt == &ip6_null_entry) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); - return; + goto out; } /* @@ -1250,11 +1418,15 @@ restart: nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); - if (ip6_ins_rt(nrt, NULL, NULL, NULL)) + if (ip6_ins_rt(nrt)) goto out; + netevent.old = &rt->u.dst; + netevent.new = &nrt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); + if (rt->rt6i_flags&RTF_CACHE) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); return; } @@ -1336,7 +1508,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; - ip6_ins_rt(nrt, NULL, NULL, NULL); + ip6_ins_rt(nrt); } out: dst_release(&rt->u.dst); @@ -1372,6 +1544,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) #ifdef CONFIG_IPV6_SUBTREES memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); #endif + rt->rt6i_table = ort->rt6i_table; } return rt; } @@ -1382,9 +1555,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle { struct fib6_node *fn; struct rt6_info *rt = NULL; + struct fib6_table *table; - write_lock_bh(&rt6_lock); - fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); + table = fib6_get_table(RT6_TABLE_INFO); + if (table == NULL) + return NULL; + + write_lock_bh(&table->tb6_lock); + fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1399,7 +1577,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle break; } out: - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return rt; } @@ -1407,21 +1585,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct in6_addr *gwaddr, int ifindex, unsigned pref) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_INFO, + .fc_metric = 1024, + .fc_ifindex = ifindex, + .fc_dst_len = prefixlen, + .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | + RTF_UP | RTF_PREF(pref), + }; + + ipv6_addr_copy(&cfg.fc_dst, prefix); + ipv6_addr_copy(&cfg.fc_gateway, gwaddr); - memset(&rtmsg, 0, sizeof(rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); - rtmsg.rtmsg_dst_len = prefixlen; - ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); - rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); /* We should treat it as a default route if prefix length is 0. */ if (!prefixlen) - rtmsg.rtmsg_flags |= RTF_DEFAULT; - rtmsg.rtmsg_ifindex = ifindex; + cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&cfg); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); } @@ -1430,12 +1610,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; - struct fib6_node *fn; + struct fib6_table *table; - fn = &ip6_routing_table; + table = fib6_get_table(RT6_TABLE_DFLT); + if (table == NULL) + return NULL; - write_lock_bh(&rt6_lock); - for (rt = fn->leaf; rt; rt=rt->u.next) { + write_lock_bh(&table->tb6_lock); + for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -1443,7 +1625,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d } if (rt) dst_hold(&rt->u.dst); - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return rt; } @@ -1451,43 +1633,65 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_DFLT, + .fc_metric = 1024, + .fc_ifindex = dev->ifindex, + .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | + RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + }; - memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); - rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | - RTF_PREF(pref); + ipv6_addr_copy(&cfg.fc_gateway, gwaddr); - rtmsg.rtmsg_ifindex = dev->ifindex; + ip6_route_add(&cfg); - ip6_route_add(&rtmsg, NULL, NULL, NULL); return rt6_get_dflt_router(gwaddr, dev); } void rt6_purge_dflt_routers(void) { struct rt6_info *rt; + struct fib6_table *table; + + /* NOTE: Keep consistent with rt6_get_dflt_router */ + table = fib6_get_table(RT6_TABLE_DFLT); + if (table == NULL) + return; restart: - read_lock_bh(&rt6_lock); - for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { + read_lock_bh(&table->tb6_lock); + for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); - - read_unlock_bh(&rt6_lock); - - ip6_del_rt(rt, NULL, NULL, NULL); - + read_unlock_bh(&table->tb6_lock); + ip6_del_rt(rt); goto restart; } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); +} + +static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, + struct fib6_config *cfg) +{ + memset(cfg, 0, sizeof(*cfg)); + + cfg->fc_table = RT6_TABLE_MAIN; + cfg->fc_ifindex = rtmsg->rtmsg_ifindex; + cfg->fc_metric = rtmsg->rtmsg_metric; + cfg->fc_expires = rtmsg->rtmsg_info; + cfg->fc_dst_len = rtmsg->rtmsg_dst_len; + cfg->fc_src_len = rtmsg->rtmsg_src_len; + cfg->fc_flags = rtmsg->rtmsg_flags; + + ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); + ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); + ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); } int ipv6_route_ioctl(unsigned int cmd, void __user *arg) { + struct fib6_config cfg; struct in6_rtmsg rtmsg; int err; @@ -1500,14 +1704,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) sizeof(struct in6_rtmsg)); if (err) return -EFAULT; - + + rtmsg_to_fib6_config(&rtmsg, &cfg); + rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&rtmsg, NULL, NULL, NULL); + err = ip6_route_add(&cfg); break; case SIOCDELRT: - err = ip6_route_del(&rtmsg, NULL, NULL, NULL); + err = ip6_route_del(&cfg); break; default: err = -EINVAL; @@ -1526,6 +1732,10 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) static int ip6_pkt_discard(struct sk_buff *skb) { + int type = ipv6_addr_type(&skb->nh.ipv6h->daddr); + if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) + IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); kfree_skb(skb); @@ -1577,6 +1787,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; + rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); @@ -1595,9 +1806,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) void rt6_ifdown(struct net_device *dev) { - write_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); - write_unlock_bh(&rt6_lock); + fib6_clean_all(fib6_ifdown, 0, dev); } struct rt6_mtu_change_arg @@ -1647,80 +1856,114 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) void rt6_mtu_change(struct net_device *dev, unsigned mtu) { - struct rt6_mtu_change_arg arg; + struct rt6_mtu_change_arg arg = { + .dev = dev, + .mtu = mtu, + }; - arg.dev = dev; - arg.mtu = mtu; - read_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); - read_unlock_bh(&rt6_lock); + fib6_clean_all(rt6_mtu_change_route, 0, &arg); } -static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, - struct in6_rtmsg *rtmsg) +static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { + [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, +}; + +static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib6_config *cfg) { - memset(rtmsg, 0, sizeof(*rtmsg)); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + int err; - rtmsg->rtmsg_dst_len = r->rtm_dst_len; - rtmsg->rtmsg_src_len = r->rtm_src_len; - rtmsg->rtmsg_flags = RTF_UP; - if (r->rtm_type == RTN_UNREACHABLE) - rtmsg->rtmsg_flags |= RTF_REJECT; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + if (err < 0) + goto errout; - if (rta[RTA_GATEWAY-1]) { - if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); - rtmsg->rtmsg_flags |= RTF_GATEWAY; - } - if (rta[RTA_DST-1]) { - if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); + err = -EINVAL; + rtm = nlmsg_data(nlh); + memset(cfg, 0, sizeof(*cfg)); + + cfg->fc_table = rtm->rtm_table; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_flags = RTF_UP; + cfg->fc_protocol = rtm->rtm_protocol; + + if (rtm->rtm_type == RTN_UNREACHABLE) + cfg->fc_flags |= RTF_REJECT; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + if (tb[RTA_GATEWAY]) { + nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); + cfg->fc_flags |= RTF_GATEWAY; } - if (rta[RTA_SRC-1]) { - if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); + + if (tb[RTA_DST]) { + int plen = (rtm->rtm_dst_len + 7) >> 3; + + if (nla_len(tb[RTA_DST]) < plen) + goto errout; + + nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); } - if (rta[RTA_OIF-1]) { - if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) - return -EINVAL; - memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + + if (tb[RTA_SRC]) { + int plen = (rtm->rtm_src_len + 7) >> 3; + + if (nla_len(tb[RTA_SRC]) < plen) + goto errout; + + nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); } - if (rta[RTA_PRIORITY-1]) { - if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); + + if (tb[RTA_OIF]) + cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); + + if (tb[RTA_PRIORITY]) + cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); + + if (tb[RTA_METRICS]) { + cfg->fc_mx = nla_data(tb[RTA_METRICS]); + cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); } - return 0; + + if (tb[RTA_TABLE]) + cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + + err = 0; +errout: + return err; } int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *r = NLMSG_DATA(nlh); - struct in6_rtmsg rtmsg; + struct fib6_config cfg; + int err; - if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) - return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return ip6_route_del(&cfg); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *r = NLMSG_DATA(nlh); - struct in6_rtmsg rtmsg; + struct fib6_config cfg; + int err; - if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) - return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); -} + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; -struct rt6_rtnl_dump_arg -{ - struct sk_buff *skb; - struct netlink_callback *cb; -}; + return ip6_route_add(&cfg); +} static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, @@ -1728,9 +1971,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, int prefix, unsigned int flags) { struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + struct nlmsghdr *nlh; struct rta_cacheinfo ci; + u32 table; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -1739,13 +1982,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } } - nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + if (nlh == NULL) + return -ENOBUFS; + + rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; rtm->rtm_dst_len = rt->rt6i_dst.plen; rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; - rtm->rtm_table = RT_TABLE_MAIN; + if (rt->rt6i_table) + table = rt->rt6i_table->tb6_id; + else + table = RT6_TABLE_UNSPEC; + rtm->rtm_table = table; + NLA_PUT_U32(skb, RTA_TABLE, table); if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) @@ -1766,31 +2017,35 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - RTA_PUT(skb, RTA_DST, 16, dst); + NLA_PUT(skb, RTA_DST, 16, dst); rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); + NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); #ifdef CONFIG_IPV6_SUBTREES if (src) { - RTA_PUT(skb, RTA_SRC, 16, src); + NLA_PUT(skb, RTA_SRC, 16, src); rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len) - RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); + NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif if (iif) - RTA_PUT(skb, RTA_IIF, 4, &iif); + NLA_PUT_U32(skb, RTA_IIF, iif); else if (dst) { struct in6_addr saddr_buf; if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) - RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); + NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } + if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + if (rt->u.dst.neighbour) - RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); - RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); + NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); + + NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); if (rt->rt6i_expires) ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); @@ -1802,23 +2057,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, ci.rta_id = 0; ci.rta_ts = 0; ci.rta_tsage = 0; - RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } -static int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct rt6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; int prefix; - if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { - struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); + if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { + struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; } else prefix = 0; @@ -1828,189 +2081,108 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, NLM_F_MULTI); } -static int fib6_dump_node(struct fib6_walker_t *w) +int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - int res; + struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; + struct sk_buff *skb; + struct rtmsg *rtm; + struct flowi fl; + int err, iif = 0; - for (rt = w->leaf; rt; rt = rt->u.next) { - res = rt6_dump_route(rt, w->args); - if (res < 0) { - /* Frame is full, suspend walking */ - w->leaf = rt; - return 1; - } - BUG_TRAP(res!=0); - } - w->leaf = NULL; - return 0; -} - -static void fib6_dump_end(struct netlink_callback *cb) -{ - struct fib6_walker_t *w = (void*)cb->args[0]; - - if (w) { - cb->args[0] = 0; - fib6_walker_unlink(w); - kfree(w); - } - cb->done = (void*)cb->args[1]; - cb->args[1] = 0; -} - -static int fib6_dump_done(struct netlink_callback *cb) -{ - fib6_dump_end(cb); - return cb->done ? cb->done(cb) : 0; -} - -int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct rt6_rtnl_dump_arg arg; - struct fib6_walker_t *w; - int res; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + if (err < 0) + goto errout; - arg.skb = skb; - arg.cb = cb; + err = -EINVAL; + memset(&fl, 0, sizeof(fl)); - w = (void*)cb->args[0]; - if (w == NULL) { - /* New dump: - * - * 1. hook callback destructor. - */ - cb->args[1] = (long)cb->done; - cb->done = fib6_dump_done; + if (tb[RTA_SRC]) { + if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) + goto errout; - /* - * 2. allocate and initialize walker. - */ - w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (w == NULL) - return -ENOMEM; - RT6_TRACE("dump<%p", w); - w->root = &ip6_routing_table; - w->func = fib6_dump_node; - w->args = &arg; - cb->args[0] = (long)w; - read_lock_bh(&rt6_lock); - res = fib6_walk(w); - read_unlock_bh(&rt6_lock); - } else { - w->args = &arg; - read_lock_bh(&rt6_lock); - res = fib6_walk_continue(w); - read_unlock_bh(&rt6_lock); + ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); } -#if RT6_DEBUG >= 3 - if (res <= 0 && skb->len == 0) - RT6_TRACE("%p>dump end\n", w); -#endif - res = res < 0 ? res : skb->len; - /* res < 0 is an error. (really, impossible) - res == 0 means that dump is complete, but skb still can contain data. - res > 0 dump is not complete, but frame is full. - */ - /* Destroy walker, if dump of this table is complete. */ - if (res <= 0) - fib6_dump_end(cb); - return res; -} - -int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) -{ - struct rtattr **rta = arg; - int iif = 0; - int err = -ENOBUFS; - struct sk_buff *skb; - struct flowi fl; - struct rt6_info *rt; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) - goto out; + if (tb[RTA_DST]) { + if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) + goto errout; - /* Reserve room for dummy headers, this skb can pass - through good chunk of routing engine. - */ - skb->mac.raw = skb->data; - skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); + ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); + } - memset(&fl, 0, sizeof(fl)); - if (rta[RTA_SRC-1]) - ipv6_addr_copy(&fl.fl6_src, - (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); - if (rta[RTA_DST-1]) - ipv6_addr_copy(&fl.fl6_dst, - (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); + if (tb[RTA_IIF]) + iif = nla_get_u32(tb[RTA_IIF]); - if (rta[RTA_IIF-1]) - memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + if (tb[RTA_OIF]) + fl.oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; dev = __dev_get_by_index(iif); if (!dev) { err = -ENODEV; - goto out_free; + goto errout; } } - fl.oif = 0; - if (rta[RTA_OIF-1]) - memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } - rt = (struct rt6_info*)ip6_route_output(NULL, &fl); + /* Reserve room for dummy headers, this skb can pass + through good chunk of routing engine. + */ + skb->mac.raw = skb->data; + skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); + rt = (struct rt6_info*) ip6_route_output(NULL, &fl); skb->dst = &rt->u.dst; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = rt6_fill_node(skb, rt, - &fl.fl6_dst, &fl.fl6_src, - iif, + err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0); if (err < 0) { - err = -EMSGSIZE; - goto out_free; + kfree_skb(skb); + goto errout; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; -out: + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +errout: return err; -out_free: - kfree_skb(skb); - goto out; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, - struct netlink_skb_parms *req) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); - u32 pid = current->pid; - u32 seq = 0; - - if (req) - pid = req->pid; - if (nlh) - seq = nlh->nlmsg_seq; - - skb = alloc_skb(size, gfp_any()); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); - return; + u32 pid = 0, seq = 0; + struct nlmsghdr *nlh = NULL; + int payload = sizeof(struct rtmsg) + 256; + int err = -ENOBUFS; + + if (info) { + pid = info->pid; + nlh = info->nlh; + if (nlh) + seq = nlh->nlmsg_seq; } - if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { + + skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); + if (skb == NULL) + goto errout; + + err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); + + err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); } /* @@ -2086,16 +2258,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) { - struct rt6_proc_arg arg; - arg.buffer = buffer; - arg.offset = offset; - arg.length = length; - arg.skip = 0; - arg.len = 0; + struct rt6_proc_arg arg = { + .buffer = buffer, + .offset = offset, + .length = length, + }; - read_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); - read_unlock_bh(&rt6_lock); + fib6_clean_all(rt6_info_route, 0, &arg); *start = buffer; if (offset) @@ -2250,13 +2419,9 @@ void __init ip6_route_init(void) { struct proc_dir_entry *p; - ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", - sizeof(struct rt6_info), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!ip6_dst_ops.kmem_cachep) - panic("cannot create ip6_dst_cache"); - + ip6_dst_ops.kmem_cachep = + kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); fib6_init(); #ifdef CONFIG_PROC_FS p = proc_net_create("ipv6_route", 0, rt6_proc_info); @@ -2268,10 +2433,16 @@ void __init ip6_route_init(void) #ifdef CONFIG_XFRM xfrm6_init(); #endif +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_init(); +#endif } void ip6_route_cleanup(void) { +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_cleanup(); +#endif #ifdef CONFIG_PROC_FS proc_net_remove("ipv6_route"); proc_net_remove("rt6_stats"); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6578c3080f47..836eecd7e62b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -18,7 +18,6 @@ * Nate Thompson <nate@thebog.net>: 6to4 support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> @@ -381,7 +380,6 @@ static int ipip6_rcv(struct sk_buff *skb) secpath_reset(skb); skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 8eff9fa1e983..7a4639db1346 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -7,7 +7,6 @@ #include <linux/mm.h> #include <linux/sysctl.h> -#include <linux/config.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <net/ndisc.h> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a50eb306e9e2..3b6575478fcc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -26,7 +26,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -252,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = &final; } + security_sk_classify_flow(sk, &fl); + err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; @@ -270,9 +271,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + sk->sk_gso_type = SKB_GSO_TCPV6; + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -376,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; @@ -429,7 +430,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -470,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -544,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -554,6 +555,24 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) } } +static int tcp_v6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + ipv6h = skb->nh.ipv6h; + th = skb->h.th; + + th->check = 0; + th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, + IPPROTO_TCP, 0); + skb->csum = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + return 0; +} static void tcp_v6_send_reset(struct sk_buff *skb) { @@ -610,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { @@ -676,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { @@ -805,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; + security_inet_conn_request(sk, skb, req); + if (tcp_v6_send_synack(sk, req, NULL)) goto drop; @@ -815,7 +838,6 @@ drop: if (req) reqsk_free(req); - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; /* don't send reset */ } @@ -909,6 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; @@ -930,9 +953,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * comment in that function for the gory details. -acme */ - ip6_dst_store(newsk, dst, NULL); - newsk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + newsk->sk_gso_type = SKB_GSO_TCPV6; + __ip6_dst_store(newsk, dst, NULL, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1011,7 +1033,7 @@ out: static int tcp_v6_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1053,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* @@ -1210,12 +1232,12 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; - bh_lock_sock(sk); + bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { #ifdef CONFIG_NET_DMA @@ -1469,7 +1491,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, + tp->write_seq-tp->snd_una, + (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, @@ -1605,6 +1628,8 @@ struct proto tcpv6_prot = { static struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8d3432a70f3a..9662561701d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -23,7 +23,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -62,81 +61,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -/* Grrr, addr_type already calculated by caller, but I don't want - * to add some silly "cookie" argument to this method just for that. - */ -static int udp_v6_get_port(struct sock *sk, unsigned short snum) +static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { - struct sock *sk2; - struct hlist_node *node; - - write_lock_bh(&udp_hash_lock); - if (snum == 0) { - int best_size_so_far, best, result, i; - - if (udp_port_rover > sysctl_local_port_range[1] || - udp_port_rover < sysctl_local_port_range[0]) - udp_port_rover = sysctl_local_port_range[0]; - best_size_so_far = 32767; - best = result = udp_port_rover; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - int size; - struct hlist_head *list; - - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - goto gotit; - } - size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; - } - result = best; - for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - if (!udp_lport_inuse(result)) - break; - } - if (i >= (1 << 16) / UDP_HTABLE_SIZE) - goto fail; -gotit: - udp_port_rover = snum = result; - } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - if (inet_sk(sk2)->num == snum && - sk2 != sk && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!sk2->sk_reuse || !sk->sk_reuse) && - ipv6_rcv_saddr_equal(sk, sk2)) - goto fail; - } - } - - inet_sk(sk)->num = snum; - if (sk_unhashed(sk)) { - sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]); - sock_prot_inc_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); - return 0; - -fail: - write_unlock_bh(&udp_hash_lock); - return 1; + return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); } static void udp_v6_hash(struct sock *sk) @@ -346,6 +273,8 @@ out: static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { + int rc; + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { kfree_skb(skb); return -1; @@ -357,7 +286,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) return 0; } - if (sock_queue_rcv_skb(sk,skb)<0) { + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP6_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return 0; @@ -476,7 +408,7 @@ static int udpv6_rcv(struct sk_buff **pskb) uh = skb->h.uh; } - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_COMPLETE && !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -783,7 +715,9 @@ do_udp_sendmsg: connected = 0; } - err = ip6_dst_lookup(sk, &dst, fl); + security_sk_classify_flow(sk, fl); + + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; if (final_p) @@ -841,7 +775,12 @@ do_append_data: if (connected) { ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); } else { dst_release(dst); } @@ -856,6 +795,16 @@ out: UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + } return err; do_confirm: diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0405d74ff910..5c8b7a568800 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,10 +16,10 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) { int err; - u32 seq; + __be32 seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; @@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { /* XXX */ + if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ decaps = 1; break; } @@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb) { return xfrm6_rcv_spi(*pskb, 0); } + +int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto) +{ + struct xfrm_state *x = NULL; + int wildcard = 0; + struct in6_addr any; + xfrm_address_t *xany; + struct xfrm_state *xfrm_vec_one = NULL; + int nh = 0; + int i = 0; + + ipv6_addr_set(&any, 0, 0, 0, 0); + xany = (xfrm_address_t *)&any; + + for (i = 0; i < 3; i++) { + xfrm_address_t *dst, *src; + switch (i) { + case 0: + dst = daddr; + src = saddr; + break; + case 1: + /* lookup state with wild-card source address */ + wildcard = 1; + dst = daddr; + src = xany; + break; + case 2: + default: + /* lookup state with wild-card addresses */ + wildcard = 1; /* XXX */ + dst = xany; + src = xany; + break; + } + + x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + if (!x) + continue; + + spin_lock(&x->lock); + + if (wildcard) { + if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + } + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + if (xfrm_state_check_expire(x)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + nh = x->type->input(x, skb); + if (nh <= 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + xfrm_vec_one = x; + break; + } + + if (!xfrm_vec_one) + goto drop; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + sp = secpath_dup(skb->sp); + if (!sp) + goto drop; + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len > XFRM_MAX_DEPTH) + goto drop; + + skb->sp->xvec[skb->sp->len] = xfrm_vec_one; + skb->sp->len ++; + + return 1; +drop: + if (xfrm_vec_one) + xfrm_state_put(xfrm_vec_one); + return -1; +} diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c new file mode 100644 index 000000000000..6031c16d46ca --- /dev/null +++ b/net/ipv6/xfrm6_mode_ro.c @@ -0,0 +1,93 @@ +/* + * xfrm6_mode_ro.c - Route optimization mode for IPv6. + * + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/ipv6.h> +#include <net/xfrm.h> + +/* Add route optimization header space. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the route optimization header. + * + * On exit, skb->h will be set to the start of the encapsulation header to be + * filled in by x->type->output and skb->nh will be set to the nextheader field + * of the extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. The value of skb->data will always + * point to the top IP header. + */ +static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + skb_push(skb, x->props.header_len); + iph = skb->nh.ipv6h; + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + skb->nh.raw = prevhdr - x->props.header_len; + skb->h.raw = skb->data + hdr_len; + memmove(skb->data, iph, hdr_len); + return 0; +} + +/* + * Do nothing about routing optimization header unlike IPsec. + */ +static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) +{ + return 0; +} + +static struct xfrm_mode xfrm6_ro_mode = { + .input = xfrm6_ro_input, + .output = xfrm6_ro_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_ROUTEOPTIMIZATION, +}; + +static int __init xfrm6_ro_init(void) +{ + return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6); +} + +static void __exit xfrm6_ro_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6); + BUG_ON(err); +} + +module_init(xfrm6_ro_init); +module_exit(xfrm6_ro_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION); diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 711d713e36d8..3a4b39b12bad 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -25,9 +25,8 @@ * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_transport_output(struct sk_buff *skb) +static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x = skb->dst->xfrm; struct ipv6hdr *iph; u8 *prevhdr; int hdr_len; @@ -35,7 +34,7 @@ static int xfrm6_transport_output(struct sk_buff *skb) skb_push(skb, x->props.header_len); iph = skb->nh.ipv6h; - hdr_len = ip6_find_1stfragopt(skb, &prevhdr); + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); skb->nh.raw = prevhdr - x->props.header_len; skb->h.raw = skb->data + hdr_len; memmove(skb->data, iph, hdr_len); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 8af79be2edca..5e7d8a7d6414 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_tunnel_output(struct sk_buff *skb) +static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct ipv6hdr *iph, *top_iph; int dsfield; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 48fccb1eca08..c260ea104c52 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,12 @@ #include <net/ipv6.h> #include <net/xfrm.h> +int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, + u8 **prevhdr) +{ + return ip6_find_1stfragopt(skb, prevhdr); +} + static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -41,13 +47,13 @@ static int xfrm6_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; @@ -59,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; @@ -69,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; + if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) + x->lastused = (u64)xtime.tv_sec; spin_unlock_bh(&x->lock); @@ -80,7 +88,7 @@ static int xfrm6_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; @@ -122,10 +130,10 @@ static int xfrm6_output_finish(struct sk_buff *skb) { struct sk_buff *segs; - if (!skb_shinfo(skb)->gso_size) + if (!skb_is_gso(skb)) return xfrm6_output_finish2(skb); - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (unlikely(IS_ERR(segs))) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ee715f2691e9..6a252e2134d1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -12,13 +12,15 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#ifdef CONFIG_IPV6_MIP6 +#include <net/mip6.h> +#endif static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; @@ -32,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return err; } +static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rt6_info *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip6_u = { + .daddr = *(struct in6_addr *)&daddr->a6, + }, + }, + }; + + if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&saddr->a6); + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -51,7 +73,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt6.rt6i_src.plen); if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(xdst, fl, AF_INET6)) { + xfrm_bundle_ok(xdst, fl, AF_INET6, + (xdst->u.rt6.rt6i_dst.plen != 128 || + xdst->u.rt6.rt6i_src.plen != 128))) { dst_clone(dst); break; } @@ -60,6 +84,40 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } +static inline struct in6_addr* +__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) +{ + return (x->type->remote_addr) ? + (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : + (struct in6_addr*)&x->id.daddr; +} + +static inline struct in6_addr* +__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) +{ + return (x->type->local_addr) ? + (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : + (struct in6_addr*)&x->props.saddr; +} + +static inline void +__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) +{ + if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) + *nflen += x->props.header_len; + else + *len += x->props.header_len; +} + +static inline void +__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) +{ + if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) + *nflen -= x->props.header_len; + else + *len -= x->props.header_len; +} + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -84,6 +142,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int int i; int err = 0; int header_len = 0; + int nfheader_len = 0; int trailer_len = 0; dst = dst_prev = NULL; @@ -110,17 +169,18 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; if (rt->rt6i_node) xdst->route_cookie = rt->rt6i_node->fn_sernum; dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { - remote = (struct in6_addr*)&xfrm[i]->id.daddr; - local = (struct in6_addr*)&xfrm[i]->props.saddr; + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { + remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); + local = __xfrm6_bundle_addr_local(xfrm[i], local); tunnel = 1; } - header_len += xfrm[i]->props.header_len; + __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; if (tunnel) { @@ -155,6 +215,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = nfheader_len; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -173,7 +234,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int x->u.rt6.rt6i_src = rt0->rt6i_src; x->u.rt6.rt6i_idev = rt0->rt6i_idev; in6_dev_hold(rt0->rt6i_idev); - header_len -= x->u.dst.xfrm->props.header_len; + __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); trailer_len -= x->u.dst.xfrm->props.trailer_len; } @@ -233,6 +294,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) { + struct ip6_mh *mh; + mh = (struct ip6_mh *)exthdr; + + fl->fl_mh_type = mh->ip6mh_type; + } + fl->proto = nexthdr; + return; +#endif + /* XXX Why are there these headers? */ case IPPROTO_AH: case IPPROTO_ESP: @@ -309,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, + .get_saddr = xfrm6_get_saddr, .find_bundle = __xfrm6_find_bundle, .bundle_create = __xfrm6_bundle_create, .decode_session = _decode_session6, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b33296b3f6de..9ddaa9d41539 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -29,9 +29,9 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = ~0; + x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = ~0; + x->sel.sport_mask = htons(0xffff); x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; @@ -42,102 +42,135 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); - if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { - struct rt6_info *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .daddr = *(struct in6_addr *)daddr, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET6)) { - ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr, - (struct in6_addr *)&x->props.saddr); - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET6; } -static struct xfrm_state * -__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) +static int +__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { - unsigned h = __xfrm6_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET6 && - spi == x->id.spi && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; + int i; + int j = 0; + + /* Rule 1: select IPsec transport except AH */ + for (i = 0; i < n; i++) { + if (src[i]->props.mode == XFRM_MODE_TRANSPORT && + src[i]->id.proto != IPPROTO_AH) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; + + /* Rule 2: select MIPv6 RO or inbound trigger */ +#ifdef CONFIG_IPV6_MIP6 + for (i = 0; i < n; i++) { + if (src[i] && + (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || + src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; +#endif + + /* Rule 3: select IPsec transport AH */ + for (i = 0; i < n; i++) { + if (src[i] && + src[i]->props.mode == XFRM_MODE_TRANSPORT && + src[i]->id.proto == IPPROTO_AH) { + dst[j++] = src[i]; + src[i] = NULL; } } - return NULL; + if (j == n) + goto end; + + /* Rule 4: select IPsec tunnel */ + for (i = 0; i < n; i++) { + if (src[i] && + src[i]->props.mode == XFRM_MODE_TUNNEL) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (likely(j == n)) + goto end; + + /* Final rule */ + for (i = 0; i < n; i++) { + if (src[i]) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + + end: + return 0; } -static struct xfrm_state * -__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) +static int +__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) { - struct xfrm_state *x, *x0; - unsigned h = __xfrm6_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - mode == x->props.mode && - proto == x->id.proto && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } + int i; + int j = 0; + + /* Rule 1: select IPsec transport */ + for (i = 0; i < n; i++) { + if (src[i]->mode == XFRM_MODE_TRANSPORT) { + dst[j++] = src[i]; + src[i] = NULL; + } } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, - (struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, - (struct in6_addr *)saddr); - x0->sel.prefixlen_d = 128; - x0->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, - (struct in6_addr *)saddr); - x0->km.state = XFRM_STATE_ACQ; - ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, - (struct in6_addr *)daddr); - x0->id.proto = proto; - x0->props.family = AF_INET6; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); - wake_up(&km_waitq); + if (j == n) + goto end; + + /* Rule 2: select MIPv6 RO or inbound trigger */ +#ifdef CONFIG_IPV6_MIP6 + for (i = 0; i < n; i++) { + if (src[i] && + (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || + src[i]->mode == XFRM_MODE_IN_TRIGGER)) { + dst[j++] = src[i]; + src[i] = NULL; + } } - if (x0) - xfrm_state_hold(x0); - return x0; + if (j == n) + goto end; +#endif + + /* Rule 3: select IPsec tunnel */ + for (i = 0; i < n; i++) { + if (src[i] && + src[i]->mode == XFRM_MODE_TUNNEL) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (likely(j == n)) + goto end; + + /* Final rule */ + for (i = 0; i < n; i++) { + if (src[i]) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + + end: + return 0; } static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, - .state_lookup = __xfrm6_state_lookup, - .find_acq = __xfrm6_find_acq, + .tmpl_sort = __xfrm6_tmpl_sort, + .state_sort = __xfrm6_state_sort, }; void __init xfrm6_state_init(void) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index d37768e5064f..7af227bb1551 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -21,7 +21,6 @@ * Based on net/ipv4/xfrm4_tunnel.c * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/xfrm.h> #include <linux/list.h> @@ -32,27 +31,6 @@ #include <linux/icmpv6.h> #include <linux/mutex.h> -#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG -# define X6TDEBUG 3 -#else -# define X6TDEBUG 1 -#endif - -#define X6TPRINTK(fmt, args...) printk(fmt, ## args) -#define X6TNOPRINTK(fmt, args...) do { ; } while(0) - -#if X6TDEBUG >= 1 -# define X6TPRINTK1 X6TPRINTK -#else -# define X6TPRINTK1 X6TNOPRINTK -#endif - -#if X6TDEBUG >= 3 -# define X6TPRINTK3 X6TPRINTK -#else -# define X6TPRINTK3 X6TNOPRINTK -#endif - /* * xfrm_tunnel_spi things are for allocating unique id ("spi") * per xfrm_address_t. @@ -63,15 +41,8 @@ struct xfrm6_tunnel_spi { xfrm_address_t addr; u32 spi; atomic_t refcnt; -#ifdef XFRM6_TUNNEL_SPI_MAGIC - u32 magic; -#endif }; -#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG -# define XFRM6_TUNNEL_SPI_MAGIC 0xdeadbeef -#endif - static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock); static u32 xfrm6_tunnel_spi; @@ -87,43 +58,15 @@ static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE]; static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE]; -#ifdef XFRM6_TUNNEL_SPI_MAGIC -static int x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi, - const char *name) -{ - if (unlikely(x6spi->magic != XFRM6_TUNNEL_SPI_MAGIC)) { - X6TPRINTK3(KERN_DEBUG "%s(): x6spi object " - "at %p has corrupted magic %08x " - "(should be %08x)\n", - name, x6spi, x6spi->magic, XFRM6_TUNNEL_SPI_MAGIC); - return -1; - } - return 0; -} -#else -static int inline x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi, - const char *name) -{ - return 0; -} -#endif - -#define X6SPI_CHECK_MAGIC(x6spi) x6spi_check_magic((x6spi), __FUNCTION__) - - static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr) { unsigned h; - X6TPRINTK3(KERN_DEBUG "%s(addr=%p)\n", __FUNCTION__, addr); - h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]; h ^= h >> 16; h ^= h >> 8; h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1; - X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, h); - return h; } @@ -137,19 +80,13 @@ static int xfrm6_tunnel_spi_init(void) { int i; - X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__); - xfrm6_tunnel_spi = 0; xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi", sizeof(struct xfrm6_tunnel_spi), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!xfrm6_tunnel_spi_kmem) { - X6TPRINTK1(KERN_ERR - "%s(): failed to allocate xfrm6_tunnel_spi_kmem\n", - __FUNCTION__); + if (!xfrm6_tunnel_spi_kmem) return -ENOMEM; - } for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]); @@ -162,22 +99,16 @@ static void xfrm6_tunnel_spi_fini(void) { int i; - X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__); - for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) { if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i])) - goto err; + return; } for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) { if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i])) - goto err; + return; } kmem_cache_destroy(xfrm6_tunnel_spi_kmem); xfrm6_tunnel_spi_kmem = NULL; - return; -err: - X6TPRINTK1(KERN_ERR "%s(): table is not empty\n", __FUNCTION__); - return; } static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) @@ -185,19 +116,13 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos; - X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr); - hlist_for_each_entry(x6spi, pos, &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { - if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { - X6SPI_CHECK_MAGIC(x6spi); - X6TPRINTK3(KERN_DEBUG "%s() = %p(%u)\n", __FUNCTION__, x6spi, x6spi->spi); + if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) return x6spi; - } } - X6TPRINTK3(KERN_DEBUG "%s() = NULL(0)\n", __FUNCTION__); return NULL; } @@ -206,8 +131,6 @@ u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; u32 spi; - X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr); - read_lock_bh(&xfrm6_tunnel_spi_lock); x6spi = __xfrm6_tunnel_spi_lookup(saddr); spi = x6spi ? x6spi->spi : 0; @@ -224,8 +147,6 @@ static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) struct hlist_node *pos; unsigned index; - X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr); - if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN || xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX) xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN; @@ -259,18 +180,10 @@ try_next_2:; spi = 0; goto out; alloc_spi: - X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for " NIP6_FMT "\n", - __FUNCTION__, - NIP6(*(struct in6_addr *)saddr)); x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC); - if (!x6spi) { - X6TPRINTK1(KERN_ERR "%s(): kmem_cache_alloc() failed\n", - __FUNCTION__); + if (!x6spi) goto out; - } -#ifdef XFRM6_TUNNEL_SPI_MAGIC - x6spi->magic = XFRM6_TUNNEL_SPI_MAGIC; -#endif + memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr)); x6spi->spi = spi; atomic_set(&x6spi->refcnt, 1); @@ -279,9 +192,7 @@ alloc_spi: index = xfrm6_tunnel_spi_hash_byaddr(saddr); hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]); - X6SPI_CHECK_MAGIC(x6spi); out: - X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi); return spi; } @@ -290,8 +201,6 @@ u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; u32 spi; - X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr); - write_lock_bh(&xfrm6_tunnel_spi_lock); x6spi = __xfrm6_tunnel_spi_lookup(saddr); if (x6spi) { @@ -301,8 +210,6 @@ u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) spi = __xfrm6_tunnel_alloc_spi(saddr); write_unlock_bh(&xfrm6_tunnel_spi_lock); - X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi); - return spi; } @@ -313,8 +220,6 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos, *n; - X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr); - write_lock_bh(&xfrm6_tunnel_spi_lock); hlist_for_each_entry_safe(x6spi, pos, n, @@ -322,12 +227,6 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) list_byaddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { - X6TPRINTK3(KERN_DEBUG "%s(): x6spi object for " NIP6_FMT - " found at %p\n", - __FUNCTION__, - NIP6(*(struct in6_addr *)saddr), - x6spi); - X6SPI_CHECK_MAGIC(x6spi); if (atomic_dec_and_test(&x6spi->refcnt)) { hlist_del(&x6spi->list_byaddr); hlist_del(&x6spi->list_byspi); @@ -359,7 +258,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_tunnel_rcv(struct sk_buff *skb) { struct ipv6hdr *iph = skb->nh.ipv6h; - u32 spi; + __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); return xfrm6_rcv_spi(skb, spi); @@ -378,20 +277,14 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case ICMPV6_ADDR_UNREACH: case ICMPV6_PORT_UNREACH: default: - X6TPRINTK3(KERN_DEBUG - "xfrm6_tunnel: Destination Unreach.\n"); break; } break; case ICMPV6_PKT_TOOBIG: - X6TPRINTK3(KERN_DEBUG - "xfrm6_tunnel: Packet Too Big.\n"); break; case ICMPV6_TIME_EXCEED: switch (code) { case ICMPV6_EXC_HOPLIMIT: - X6TPRINTK3(KERN_DEBUG - "xfrm6_tunnel: Too small Hoplimit.\n"); break; case ICMPV6_EXC_FRAGTIME: default: @@ -414,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static int xfrm6_tunnel_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) @@ -448,22 +341,14 @@ static struct xfrm6_tunnel xfrm6_tunnel_handler = { static int __init xfrm6_tunnel_init(void) { - X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__); - - if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) { - X6TPRINTK1(KERN_ERR - "xfrm6_tunnel init: can't add xfrm type\n"); + if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) return -EAGAIN; - } + if (xfrm6_tunnel_register(&xfrm6_tunnel_handler)) { - X6TPRINTK1(KERN_ERR - "xfrm6_tunnel init(): can't add handler\n"); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); return -EAGAIN; } if (xfrm6_tunnel_spi_init() < 0) { - X6TPRINTK1(KERN_ERR - "xfrm6_tunnel init: failed to initialize spi\n"); xfrm6_tunnel_deregister(&xfrm6_tunnel_handler); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); return -EAGAIN; @@ -473,15 +358,9 @@ static int __init xfrm6_tunnel_init(void) static void __exit xfrm6_tunnel_fini(void) { - X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__); - xfrm6_tunnel_spi_fini(); - if (xfrm6_tunnel_deregister(&xfrm6_tunnel_handler)) - X6TPRINTK1(KERN_ERR - "xfrm6_tunnel close: can't remove handler\n"); - if (xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6) < 0) - X6TPRINTK1(KERN_ERR - "xfrm6_tunnel close: can't remove xfrm type\n"); + xfrm6_tunnel_deregister(&xfrm6_tunnel_handler); + xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); } module_init(xfrm6_tunnel_init); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 811d998725bc..bef3f61569f7 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -28,7 +28,6 @@ * See net/ipx/ChangeLog. */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/if_arp.h> @@ -1643,13 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out; - ipx = ipx_hdr(skb); - ipx_pktsize = ntohs(ipx->ipx_pktsize); + if (!pskb_may_pull(skb, sizeof(struct ipxhdr))) + goto drop; + + ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize); /* Too small or invalid header? */ - if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len) + if (ipx_pktsize < sizeof(struct ipxhdr) || + !pskb_may_pull(skb, ipx_pktsize)) goto drop; + ipx = ipx_hdr(skb); if (ipx->ipx_checksum != IPX_NO_CHECKSUM && ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize)) goto drop; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 1f73d9ea434d..4c0c71206e54 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -4,7 +4,6 @@ * Copyright(C) Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2002 */ -#include <linux/config.h> #include <linux/init.h> #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index bba3431cd9a5..a30dbb1e08fb 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -7,7 +7,6 @@ * See net/ipx/ChangeLog. */ -#include <linux/config.h> #include <linux/list.h> #include <linux/route.h> #include <linux/spinlock.h> diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 510eda96d10a..fa574735c76f 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -6,7 +6,6 @@ * Added /proc/sys/net/ipx/ipx_pprop_broadcasting - acme March 4, 2001 */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2f37c9f35e27..7e1aea89ef05 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -42,7 +42,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> @@ -133,13 +132,14 @@ static void irda_disconnect_indication(void *instance, void *sap, /* Prevent race conditions with irda_release() and irda_shutdown() */ if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) { + lock_sock(sk); sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); - /* Uh-oh... Should use sock_orphan ? */ - sock_set_flag(sk, SOCK_DEAD); + sock_orphan(sk); + release_sock(sk); /* Close our TSAP. * If we leave it open, IrLMP put it back into the list of @@ -309,7 +309,8 @@ static void irda_connect_response(struct irda_sock *self) IRDA_ASSERT(self != NULL, return;); - skb = dev_alloc_skb(64); + skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, + GFP_ATOMIC); if (skb == NULL) { IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n", __FUNCTION__); @@ -1213,6 +1214,7 @@ static int irda_release(struct socket *sock) if (sk == NULL) return 0; + lock_sock(sk); sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); @@ -1222,6 +1224,7 @@ static int irda_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; + release_sock(sk); /* Purge queues (see sock_init_data()) */ skb_queue_purge(&sk->sk_receive_queue); @@ -1354,6 +1357,7 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __FUNCTION__); IRDA_ASSERT(self != NULL, return -1;); + IRDA_ASSERT(!sock_error(sk), return -1;); skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); @@ -1406,6 +1410,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(3, "%s()\n", __FUNCTION__); IRDA_ASSERT(self != NULL, return -1;); + IRDA_ASSERT(!sock_error(sk), return -1;); if (sock->flags & __SO_ACCEPTCON) return(-EINVAL); diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 286881978858..ad6b6af3dd97 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -29,7 +29,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/proc_fs.h> @@ -116,12 +115,10 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line) IRDA_ASSERT(ircomm != NULL, return NULL;); - self = kmalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); + self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); if (self == NULL) return NULL; - memset(self, 0, sizeof(struct ircomm_cb)); - self->notify = *notify; self->magic = IRCOMM_MAGIC; diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index d9097207aed3..c8e0d89ee11f 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -81,7 +81,7 @@ static int ircomm_lmp_connect_response(struct ircomm_cb *self, /* Any userdata supplied? */ if (userdata == NULL) { - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -115,7 +115,7 @@ static int ircomm_lmp_disconnect_request(struct ircomm_cb *self, IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); if (!userdata) { - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index 6009bab05091..a39f5735a90b 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -121,7 +121,7 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) skb = self->ctrl_skb; if (!skb) { - skb = dev_alloc_skb(256); + skb = alloc_skb(256, GFP_ATOMIC); if (!skb) { spin_unlock_irqrestore(&self->spinlock, flags); return -ENOMEM; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 6f20b4206e08..3bcdb467efc5 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -30,7 +30,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> @@ -124,7 +123,6 @@ static int __init ircomm_tty_init(void) driver->owner = THIS_MODULE; driver->driver_name = "ircomm"; driver->name = "ircomm"; - driver->devfs_name = "ircomm"; driver->major = IRCOMM_TTY_MAJOR; driver->minor_start = IRCOMM_TTY_MINOR; driver->type = TTY_DRIVER_TYPE_SERIAL; @@ -381,12 +379,11 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) self = hashbin_lock_find(ircomm_tty, line, NULL); if (!self) { /* No, so make new instance */ - self = kmalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL); + self = kzalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL); if (self == NULL) { IRDA_ERROR("%s(), kmalloc failed!\n", __FUNCTION__); return -ENOMEM; } - memset(self, 0, sizeof(struct ircomm_tty_cb)); self->magic = IRCOMM_TTY_MAGIC; self->flow = FLOW_STOP; @@ -761,8 +758,9 @@ static int ircomm_tty_write(struct tty_struct *tty, } } else { /* Prepare a full sized frame */ - skb = dev_alloc_skb(self->max_data_size+ - self->max_header_size); + skb = alloc_skb(self->max_data_size+ + self->max_header_size, + GFP_ATOMIC); if (!skb) { spin_unlock_irqrestore(&self->spinlock, flags); return -ENOBUFS; diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index e3debbdb67f5..7e7a31798d8d 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -29,7 +29,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/string.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> @@ -402,12 +401,10 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type) } /* Allocate dongle info for this instance */ - dongle = kmalloc(sizeof(dongle_t), GFP_KERNEL); + dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL); if (!dongle) goto out; - memset(dongle, 0, sizeof(dongle_t)); - /* Bind the registration info to this particular instance */ dongle->issue = reg; dongle->dev = dev; diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 2d2e2b1919f4..415cf4eec23b 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/skbuff.h> @@ -346,10 +345,11 @@ static void iriap_disconnect_request(struct iriap_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (tx_skb == NULL) { - IRDA_DEBUG(0, "%s(), Could not allocate an sk_buff of length %d\n", - __FUNCTION__, 64); + IRDA_DEBUG(0, + "%s(), Could not allocate an sk_buff of length %d\n", + __FUNCTION__, LMP_MAX_HEADER); return; } @@ -397,7 +397,7 @@ int iriap_getvaluebyclass_request(struct iriap_cb *self, attr_len = strlen(attr); /* Up to IAS_MAX_ATTRIBNAME = 60 */ skb_len = self->max_header_size+2+name_len+1+attr_len+4; - tx_skb = dev_alloc_skb(skb_len); + tx_skb = alloc_skb(skb_len, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -563,7 +563,8 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self, * value. We add 32 bytes because of the 6 bytes for the frame and * max 5 bytes for the value coding. */ - tx_skb = dev_alloc_skb(value->len + self->max_header_size + 32); + tx_skb = alloc_skb(value->len + self->max_header_size + 32, + GFP_ATOMIC); if (!tx_skb) return; @@ -701,7 +702,7 @@ void iriap_send_ack(struct iriap_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER + 1, GFP_ATOMIC); if (!tx_skb) return; diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c index a73607450de1..99b18dc7a0b7 100644 --- a/net/irda/iriap_event.c +++ b/net/irda/iriap_event.c @@ -365,7 +365,7 @@ static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event, switch (event) { case IAP_LM_CONNECT_INDICATION: - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (tx_skb == NULL) { IRDA_WARNING("%s: unable to malloc!\n", __FUNCTION__); return; diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index 82e665c79991..a154b1d71c0f 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -82,13 +82,12 @@ struct ias_object *irias_new_object( char *name, int id) IRDA_DEBUG( 4, "%s()\n", __FUNCTION__); - obj = kmalloc(sizeof(struct ias_object), GFP_ATOMIC); + obj = kzalloc(sizeof(struct ias_object), GFP_ATOMIC); if (obj == NULL) { IRDA_WARNING("%s(), Unable to allocate object!\n", __FUNCTION__); return NULL; } - memset(obj, 0, sizeof( struct ias_object)); obj->magic = IAS_OBJECT_MAGIC; obj->name = strndup(name, IAS_MAX_CLASSNAME); @@ -346,13 +345,12 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); IRDA_ASSERT(name != NULL, return;); - attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC); + attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC); if (attrib == NULL) { IRDA_WARNING("%s: Unable to allocate attribute!\n", __FUNCTION__); return; } - memset(attrib, 0, sizeof( struct ias_attrib)); attrib->magic = IAS_ATTRIB_MAGIC; attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); @@ -382,13 +380,12 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, IRDA_ASSERT(name != NULL, return;); IRDA_ASSERT(octets != NULL, return;); - attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC); + attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC); if (attrib == NULL) { IRDA_WARNING("%s: Unable to allocate attribute!\n", __FUNCTION__); return; } - memset(attrib, 0, sizeof( struct ias_attrib)); attrib->magic = IAS_ATTRIB_MAGIC; attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); @@ -416,13 +413,12 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, IRDA_ASSERT(name != NULL, return;); IRDA_ASSERT(value != NULL, return;); - attrib = kmalloc(sizeof( struct ias_attrib), GFP_ATOMIC); + attrib = kzalloc(sizeof( struct ias_attrib), GFP_ATOMIC); if (attrib == NULL) { IRDA_WARNING("%s: Unable to allocate attribute!\n", __FUNCTION__); return; } - memset(attrib, 0, sizeof( struct ias_attrib)); attrib->magic = IAS_ATTRIB_MAGIC; attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); @@ -443,12 +439,11 @@ struct ias_value *irias_new_integer_value(int integer) { struct ias_value *value; - value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC); + value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); if (value == NULL) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); return NULL; } - memset(value, 0, sizeof(struct ias_value)); value->type = IAS_INTEGER; value->len = 4; @@ -469,12 +464,11 @@ struct ias_value *irias_new_string_value(char *string) { struct ias_value *value; - value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC); + value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); if (value == NULL) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); return NULL; } - memset( value, 0, sizeof( struct ias_value)); value->type = IAS_STRING; value->charset = CS_ASCII; @@ -495,12 +489,11 @@ struct ias_value *irias_new_octseq_value(__u8 *octseq , int len) { struct ias_value *value; - value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC); + value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); if (value == NULL) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); return NULL; } - memset(value, 0, sizeof(struct ias_value)); value->type = IAS_OCT_SEQ; /* Check length */ @@ -522,12 +515,11 @@ struct ias_value *irias_new_missing_value(void) { struct ias_value *value; - value = kmalloc(sizeof(struct ias_value), GFP_ATOMIC); + value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); if (value == NULL) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); return NULL; } - memset(value, 0, sizeof(struct ias_value)); value->type = IAS_MISSING; value->len = 0; diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index f8e6cb0db04b..95cf1234ea17 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -173,13 +173,14 @@ void irlan_client_discovery_indication(discinfo_t *discovery, rcu_read_lock(); self = irlan_get_any(); if (self) { - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); + IRDA_ASSERT(self->magic == IRLAN_MAGIC, goto out;); IRDA_DEBUG(1, "%s(), Found instance (%08x)!\n", __FUNCTION__ , daddr); irlan_client_wakeup(self, saddr, daddr); } +IRDA_ASSERT_LABEL(out:) rcu_read_unlock(); } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 657d12210578..9b962f247714 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -23,7 +23,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> @@ -637,7 +636,8 @@ void irlan_get_provider_info(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(64); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER, + GFP_ATOMIC); if (!skb) return; @@ -669,7 +669,10 @@ void irlan_open_data_channel(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(64); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3") + + IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "DIRECT"), + GFP_ATOMIC); if (!skb) return; @@ -705,7 +708,9 @@ void irlan_close_data_channel(struct irlan_cb *self) if (self->client.tsap_ctrl == NULL) return; - skb = dev_alloc_skb(64); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN"), + GFP_ATOMIC); if (!skb) return; @@ -716,7 +721,7 @@ void irlan_close_data_channel(struct irlan_cb *self) /* Build frame */ frame[0] = CMD_CLOSE_DATA_CHAN; - frame[1] = 0x01; /* Two parameters */ + frame[1] = 0x01; /* One parameter */ irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data); @@ -740,7 +745,11 @@ static void irlan_open_unicast_addr(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(128); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + + IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"), + GFP_ATOMIC); if (!skb) return; @@ -778,7 +787,12 @@ void irlan_set_broadcast_filter(struct irlan_cb *self, int status) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(128); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") + + /* We may waste one byte here...*/ + IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"), + GFP_ATOMIC); if (!skb) return; @@ -817,7 +831,12 @@ void irlan_set_multicast_filter(struct irlan_cb *self, int status) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(128); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") + + /* We may waste one byte here...*/ + IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "NONE"), + GFP_ATOMIC); if (!skb) return; @@ -857,7 +876,12 @@ static void irlan_get_unicast_addr(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(128); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + + IRLAN_STRING_PARAMETER_LEN("FILTER_OPERATION", + "DYNAMIC"), + GFP_ATOMIC); if (!skb) return; @@ -892,7 +916,10 @@ void irlan_get_media_char(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(64); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3"), + GFP_ATOMIC); + if (!skb) return; diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 953e255d2bc8..b0ccc455b747 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -25,7 +25,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/inetdevice.h> diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index 39c202d1c374..58efde919667 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -296,7 +296,14 @@ void irlan_provider_send_reply(struct irlan_cb *self, int command, IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = dev_alloc_skb(128); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + /* Bigger param length comes from CMD_GET_MEDIA_CHAR */ + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BORADCAST") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") + + IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "HOSTED"), + GFP_ATOMIC); + if (!skb) return; @@ -354,8 +361,7 @@ void irlan_provider_send_reply(struct irlan_cb *self, int command, } else skb->data[1] = 0x02; /* 2 parameters */ irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data); - irlan_insert_array_param(skb, "RECONNECT_KEY", "LINUX RULES!", - 12); + irlan_insert_string_param(skb, "RECONNECT_KEY", "LINUX RULES!"); break; case CMD_FILTER_OPERATION: irlan_filter_request(self, skb); diff --git a/net/irda/irlap.c b/net/irda/irlap.c index a16528657b4c..e7852a07495e 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -29,7 +29,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/skbuff.h> @@ -117,11 +116,10 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, IRDA_DEBUG(4, "%s()\n", __FUNCTION__); /* Initialize the irlap structure. */ - self = kmalloc(sizeof(struct irlap_cb), GFP_KERNEL); + self = kzalloc(sizeof(struct irlap_cb), GFP_KERNEL); if (self == NULL) return NULL; - memset(self, 0, sizeof(struct irlap_cb)); self->magic = LAP_MAGIC; /* Make a binding between the layers */ @@ -883,7 +881,7 @@ static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) /* Change speed now, or just piggyback speed on frames */ if (now) { /* Send down empty frame to trigger speed change */ - skb = dev_alloc_skb(0); + skb = alloc_skb(0, GFP_ATOMIC); if (skb) irlap_queue_xmit(self, skb); } @@ -1223,7 +1221,7 @@ static int irlap_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct irlap_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct irlap_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; @@ -1239,7 +1237,6 @@ static int irlap_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index a505b5457608..99faff68c399 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -25,7 +25,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/delay.h> diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 3e9a06abbdd0..dba349c832d0 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -117,7 +117,9 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos) IRDA_ASSERT(self->magic == LAP_MAGIC, return;); /* Allocate frame */ - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(sizeof(struct snrm_frame) + + IRLAP_NEGOCIATION_PARAMS_LEN, + GFP_ATOMIC); if (!tx_skb) return; @@ -136,7 +138,7 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos) * If we are establishing a connection then insert QoS paramerters */ if (qos) { - skb_put(tx_skb, 9); /* 21 left */ + skb_put(tx_skb, 9); /* 25 left */ frame->saddr = cpu_to_le32(self->saddr); frame->daddr = cpu_to_le32(self->daddr); @@ -210,7 +212,9 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos) IRDA_ASSERT(self->magic == LAP_MAGIC, return;); /* Allocate frame */ - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(sizeof(struct ua_frame) + + IRLAP_NEGOCIATION_PARAMS_LEN, + GFP_ATOMIC); if (!tx_skb) return; @@ -245,23 +249,23 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos) void irlap_send_dm_frame( struct irlap_cb *self) { struct sk_buff *tx_skb = NULL; - __u8 *frame; + struct dm_frame *frame; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - tx_skb = dev_alloc_skb(32); + tx_skb = alloc_skb(sizeof(struct dm_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct dm_frame *)skb_put(tx_skb, 2); if (self->state == LAP_NDM) - frame[0] = CBROADCAST; + frame->caddr = CBROADCAST; else - frame[0] = self->caddr; + frame->caddr = self->caddr; - frame[1] = DM_RSP | PF_BIT; + frame->control = DM_RSP | PF_BIT; irlap_queue_xmit(self, tx_skb); } @@ -275,21 +279,21 @@ void irlap_send_dm_frame( struct irlap_cb *self) void irlap_send_disc_frame(struct irlap_cb *self) { struct sk_buff *tx_skb = NULL; - __u8 *frame; + struct disc_frame *frame; IRDA_DEBUG(3, "%s()\n", __FUNCTION__); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - tx_skb = dev_alloc_skb(16); + tx_skb = alloc_skb(sizeof(struct disc_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct disc_frame *)skb_put(tx_skb, 2); - frame[0] = self->caddr | CMD_FRAME; - frame[1] = DISC_CMD | PF_BIT; + frame->caddr = self->caddr | CMD_FRAME; + frame->control = DISC_CMD | PF_BIT; irlap_queue_xmit(self, tx_skb); } @@ -315,7 +319,8 @@ void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s, IRDA_ASSERT(self->magic == LAP_MAGIC, return;); IRDA_ASSERT(discovery != NULL, return;); - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(sizeof(struct xid_frame) + IRLAP_DISCOVERY_INFO_LEN, + GFP_ATOMIC); if (!tx_skb) return; @@ -422,11 +427,10 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self, return; } - if ((discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) { + if ((discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) { IRDA_WARNING("%s: kmalloc failed!\n", __FUNCTION__); return; } - memset(discovery, 0, sizeof(discovery_t)); discovery->data.daddr = info->daddr; discovery->data.saddr = self->saddr; @@ -574,18 +578,18 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self, void irlap_send_rr_frame(struct irlap_cb *self, int command) { struct sk_buff *tx_skb; - __u8 *frame; + struct rr_frame *frame; - tx_skb = dev_alloc_skb(16); + tx_skb = alloc_skb(sizeof(struct rr_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct rr_frame *)skb_put(tx_skb, 2); - frame[0] = self->caddr; - frame[0] |= (command) ? CMD_FRAME : 0; + frame->caddr = self->caddr; + frame->caddr |= (command) ? CMD_FRAME : 0; - frame[1] = RR | PF_BIT | (self->vr << 5); + frame->control = RR | PF_BIT | (self->vr << 5); irlap_queue_xmit(self, tx_skb); } @@ -599,16 +603,16 @@ void irlap_send_rr_frame(struct irlap_cb *self, int command) void irlap_send_rd_frame(struct irlap_cb *self) { struct sk_buff *tx_skb; - __u8 *frame; + struct rd_frame *frame; - tx_skb = dev_alloc_skb(16); + tx_skb = alloc_skb(sizeof(struct rd_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct rd_frame *)skb_put(tx_skb, 2); - frame[0] = self->caddr; - frame[1] = RD_RSP | PF_BIT; + frame->caddr = self->caddr; + frame->caddr = RD_RSP | PF_BIT; irlap_queue_xmit(self, tx_skb); } @@ -1215,7 +1219,7 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, struct test_frame *frame; __u8 *info; - tx_skb = dev_alloc_skb(cmd->len+sizeof(struct test_frame)); + tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC); if (!tx_skb) return; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 57ea160f470b..5073261b9d0c 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> @@ -79,10 +78,9 @@ int __init irlmp_init(void) { IRDA_DEBUG(1, "%s()\n", __FUNCTION__); /* Initialize the irlmp structure. */ - irlmp = kmalloc( sizeof(struct irlmp_cb), GFP_KERNEL); + irlmp = kzalloc( sizeof(struct irlmp_cb), GFP_KERNEL); if (irlmp == NULL) return -ENOMEM; - memset(irlmp, 0, sizeof(struct irlmp_cb)); irlmp->magic = LMP_MAGIC; @@ -161,12 +159,11 @@ struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid) return NULL; /* Allocate new instance of a LSAP connection */ - self = kmalloc(sizeof(struct lsap_cb), GFP_ATOMIC); + self = kzalloc(sizeof(struct lsap_cb), GFP_ATOMIC); if (self == NULL) { IRDA_ERROR("%s: can't allocate memory\n", __FUNCTION__); return NULL; } - memset(self, 0, sizeof(struct lsap_cb)); self->magic = LMP_LSAP_MAGIC; self->slsap_sel = slsap_sel; @@ -289,12 +286,11 @@ void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify) /* * Allocate new instance of a LSAP connection */ - lap = kmalloc(sizeof(struct lap_cb), GFP_KERNEL); + lap = kzalloc(sizeof(struct lap_cb), GFP_KERNEL); if (lap == NULL) { IRDA_ERROR("%s: unable to kmalloc\n", __FUNCTION__); return; } - memset(lap, 0, sizeof(struct lap_cb)); lap->irlap = irlap; lap->magic = LMP_LAP_MAGIC; @@ -396,7 +392,7 @@ int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel, /* Any userdata? */ if (tx_skb == NULL) { - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index 26649f6528e6..4c90dd1b4503 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/kernel.h> #include <net/irda/irda.h> diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c index 91cd268172fa..39761a1d18f5 100644 --- a/net/irda/irlmp_frame.c +++ b/net/irda/irlmp_frame.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/skbuff.h> #include <linux/kernel.h> diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 634901dd156f..2869b16e417d 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -31,7 +31,6 @@ * Jean II */ -#include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index e4fe1e80029c..80887528e77e 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -244,12 +244,10 @@ #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/proc_fs.h> -#include <linux/devfs_fs_kernel.h> #include <linux/netdevice.h> #include <linux/miscdevice.h> #include <linux/poll.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/ctype.h> /* isspace() */ #include <asm/uaccess.h> #include <linux/init.h> diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index e53bf9e0053e..a1e502ff9070 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -476,11 +476,10 @@ dev_irnet_open(struct inode * inode, #endif /* SECURE_DEVIRNET */ /* Allocate a private structure for this IrNET instance */ - ap = kmalloc(sizeof(*ap), GFP_KERNEL); + ap = kzalloc(sizeof(*ap), GFP_KERNEL); DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); /* initialize the irnet structure */ - memset(ap, 0, sizeof(*ap)); ap->file = file; /* PPP channel setup */ diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 1b1c4193359a..86805c3d8324 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -23,7 +23,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/mm.h> #include <linux/ctype.h> #include <linux/sysctl.h> diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 8aff254cb418..3c2e70b77df1 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/seq_file.h> @@ -86,10 +85,9 @@ static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 }; */ int __init irttp_init(void) { - irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL); + irttp = kzalloc(sizeof(struct irttp_cb), GFP_KERNEL); if (irttp == NULL) return -ENOMEM; - memset(irttp, 0, sizeof(struct irttp_cb)); irttp->magic = TTP_MAGIC; @@ -307,7 +305,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self, IRDA_DEBUG(2, "%s(), fragmenting ...\n", __FUNCTION__); /* Make new segment */ - frag = dev_alloc_skb(self->max_seg_size+self->max_header_size); + frag = alloc_skb(self->max_seg_size+self->max_header_size, + GFP_ATOMIC); if (!frag) return; @@ -390,12 +389,11 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) return NULL; } - self = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC); + self = kzalloc(sizeof(struct tsap_cb), GFP_ATOMIC); if (self == NULL) { IRDA_DEBUG(0, "%s(), unable to kmalloc!\n", __FUNCTION__); return NULL; } - memset(self, 0, sizeof(struct tsap_cb)); spin_lock_init(&self->lock); /* Initialise todo timer */ @@ -806,12 +804,12 @@ static inline void irttp_give_credit(struct tsap_cb *self) self->send_credit, self->avail_credit, self->remote_credit); /* Give credit to peer */ - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(TTP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return; /* Reserve space for LMP, and LAP header */ - skb_reserve(tx_skb, self->max_header_size); + skb_reserve(tx_skb, LMP_MAX_HEADER); /* * Since we can transmit and receive frames concurrently, @@ -1095,7 +1093,8 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, /* Any userdata supplied? */ if (userdata == NULL) { - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, + GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -1343,7 +1342,8 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, /* Any userdata supplied? */ if (userdata == NULL) { - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, + GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -1542,14 +1542,14 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata, if (!userdata) { struct sk_buff *tx_skb; - tx_skb = dev_alloc_skb(64); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; /* * Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER); + skb_reserve(tx_skb, LMP_MAX_HEADER); userdata = tx_skb; } @@ -1877,7 +1877,7 @@ static int irttp_seq_open(struct inode *inode, struct file *file) int rc = -ENOMEM; struct irttp_iter_state *s; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; @@ -1887,7 +1887,6 @@ static int irttp_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/irda/qos.c b/net/irda/qos.c index ddfb5c502a90..95a69c013ee8 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -30,7 +30,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <asm/byteorder.h> #include <net/irda/irda.h> diff --git a/net/irda/timer.c b/net/irda/timer.c index 0e17f976add6..3871a2b911f9 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -25,7 +25,6 @@ ********************************************************************/ #include <asm/system.h> -#include <linux/config.h> #include <linux/delay.h> #include <net/irda/timer.h> diff --git a/net/key/af_key.c b/net/key/af_key.c index d5e2121ea207..ff98e70b0931 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -14,7 +14,6 @@ * Derek Atkins <derek@ihtfp.com> */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> @@ -1732,7 +1731,8 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) + if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, + (void*)&reqid) != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -1766,7 +1766,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) } /* addresses present only in tunnel mode */ - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -1998,7 +1998,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i int req_size; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode) + if (t->mode == XFRM_MODE_TUNNEL) req_size += 2*socklen; else size -= 2*socklen; @@ -2014,7 +2014,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -2140,7 +2140,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.sport) - xp->selector.sport_mask = ~0; + xp->selector.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); @@ -2153,7 +2153,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.dport) - xp->selector.dport_mask = ~0; + xp->selector.dport_mask = htons(0xffff); sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; if (sec_ctx != NULL) { @@ -2243,7 +2243,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.sport) - sel.sport_mask = ~0; + sel.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); @@ -2251,7 +2251,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.dport) - sel.dport_mask = ~0; + sel.dport_mask = htons(0xffff); sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; memset(&tmp, 0, sizeof(struct xfrm_policy)); @@ -2269,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); + xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, + &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); if (xp == NULL) return -ENOENT; @@ -2331,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (dir >= XFRM_POLICY_MAX) return -EINVAL; - xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, + xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, hdr->sadb_msg_type == SADB_X_SPDDELETE2); if (xp == NULL) return -ENOENT; @@ -2379,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; - return xfrm_policy_walk(dump_sp, &data); + return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); } static int key_notify_policy_flush(struct km_event *c) @@ -2406,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; - xfrm_policy_flush(); + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; @@ -2668,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) + return 0; + switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); @@ -2676,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: + if (c->data.type != XFRM_POLICY_TYPE_MAIN) + break; return key_notify_policy_flush(c); default: printk("pfkey: Unknown policy event %d\n", c->event); @@ -2709,6 +2716,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct #endif int sockaddr_size; int size; + struct sadb_x_sec_ctx *sec_ctx; + struct xfrm_sec_ctx *xfrm_ctx; + int ctx_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -2724,6 +2734,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct else if (x->id.proto == IPPROTO_ESP) size += count_esp_combs(t); + if ((xfrm_ctx = x->security)) { + ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); + size += sizeof(struct sadb_x_sec_ctx) + ctx_size; + } + skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2819,17 +2834,31 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct else if (x->id.proto == IPPROTO_ESP) dump_esp_combs(skb, t); + /* security context */ + if (xfrm_ctx) { + sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, + sizeof(struct sadb_x_sec_ctx) + ctx_size); + sec_ctx->sadb_x_sec_len = + (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); + sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; + sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; + sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; + sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; + memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, + xfrm_ctx->ctx_len); + } + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } -static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, +static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; @@ -2870,7 +2899,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; - xp->family = family; + xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && @@ -2886,8 +2915,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) + sec_ctx->sadb_x_sec_len) { + *dir = -EINVAL; goto out; + } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2897,6 +2928,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, if (*dir) goto out; } + else { + *dir = security_xfrm_sock_policy_alloc(xp, sk); + if (*dir) + goto out; + } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index aea6616cea3d..7e6bc41eeb21 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -115,14 +115,12 @@ static struct lapb_cb *lapb_devtostruct(struct net_device *dev) */ static struct lapb_cb *lapb_create_cb(void) { - struct lapb_cb *lapb = kmalloc(sizeof(*lapb), GFP_ATOMIC); + struct lapb_cb *lapb = kzalloc(sizeof(*lapb), GFP_ATOMIC); if (!lapb) goto out; - memset(lapb, 0x00, sizeof(*lapb)); - skb_queue_head_init(&lapb->write_queue); skb_queue_head_init(&lapb->ack_queue); @@ -240,11 +238,13 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms) goto out_put; if (lapb->state == LAPB_STATE_0) { - if (((parms->mode & LAPB_EXTENDED) && - (parms->window < 1 || parms->window > 127)) || - (parms->window < 1 || parms->window > 7)) - goto out_put; - + if (parms->mode & LAPB_EXTENDED) { + if (parms->window < 1 || parms->window > 127) + goto out_put; + } else { + if (parms->window < 1 || parms->window > 7) + goto out_put; + } lapb->mode = parms->mode; lapb->window = parms->window; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 75c9b1480801..2652ead96c64 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -20,7 +20,6 @@ * * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> @@ -785,24 +784,20 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; - if (used + offset < skb->len) - continue; - if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } + + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + + /* Partial read */ + if (used + offset < skb->len) + continue; } while (len > 0); - /* - * According to UNIX98, msg_name/msg_namelen are ignored - * on connected socket. -ANK - * But... af_llc still doesn't have separate sets of methods for - * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will - * eventually fix this tho :-) -acme - */ - if (sk->sk_type == SOCK_DGRAM) - goto copy_uaddr; out: release_sock(sk); return copied; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index bd242a49514a..d12413cff5bd 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -33,10 +33,9 @@ unsigned char llc_station_mac_sa[ETH_ALEN]; */ static struct llc_sap *llc_sap_alloc(void) { - struct llc_sap *sap = kmalloc(sizeof(*sap), GFP_ATOMIC); + struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); if (sap) { - memset(sap, 0, sizeof(*sap)); sap->state = LLC_SAP_STATE_ACTIVE; memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN); rwlock_init(&sap->sk_list.lock); diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 5ae47be7dde0..a89917130a7b 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -11,7 +11,6 @@ * * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index bd531cb235a7..19308fece3ad 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -12,7 +12,6 @@ * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/proc_fs.h> diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 20c4eb5c1ac6..61cb8cf7d153 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -51,10 +51,10 @@ void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) { struct sockaddr_llc *addr; - if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */ - return; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); + + memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; @@ -330,6 +330,9 @@ static void llc_sap_mcast(struct llc_sap *sap, if (llc->laddr.lsap != laddr->lsap) continue; + if (llc->dev != skb->dev) + continue; + skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) break; diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index f37dbf8ef126..8275bd33bd9d 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -11,7 +11,6 @@ * * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/init.h> #include <linux/module.h> #include <net/llc.h> diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index d1eaddb13633..45d7dd92a088 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -4,7 +4,6 @@ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/sysctl.h> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index b1622b7de1cf..0a28d2c5c44f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK <file:Documentation/modules.txt>. The module will be called ipt_CONNMARK.o. If unsure, say `N'. +config NETFILTER_XT_TARGET_DSCP + tristate '"DSCP" target support' + depends on NETFILTER_XTABLES + depends on IP_NF_MANGLE || IP6_NF_MANGLE + help + This option adds a `DSCP' target, which allows you to manipulate + the IPv4/IPv6 header DSCP field (differentiated services codepoint). + + The DSCP field can have any value between 0x0 and 0x3f inclusive. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_XTABLES @@ -263,6 +275,17 @@ config NETFILTER_XT_MATCH_DCCP If you want to compile it as a module, say M here and read <file:Documentation/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_DSCP + tristate '"DSCP" match support' + depends on NETFILTER_XTABLES + help + This option adds a `DSCP' match, which allows you to match against + the IPv4/IPv6 header DSCP field (differentiated services codepoint). + + The DSCP field can have any value between 0x0 and 0x3f inclusive. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_ESP tristate '"ESP" match support' depends on NETFILTER_XTABLES @@ -386,8 +409,8 @@ config NETFILTER_XT_MATCH_REALM <file:Documentation/modules.txt>. If unsure, say `N'. config NETFILTER_XT_MATCH_SCTP - tristate '"sctp" protocol match support' - depends on NETFILTER_XTABLES + tristate '"sctp" protocol match support (EXPERIMENTAL)' + depends on NETFILTER_XTABLES && EXPERIMENTAL help With this option enabled, you will be able to use the `sctp' match in order to match on SCTP source/destination ports @@ -411,7 +434,10 @@ config NETFILTER_XT_MATCH_STATISTIC tristate '"statistic" match support' depends on NETFILTER_XTABLES help - statistic module + This option adds a `statistic' match, which allows you to match + on packets periodically or randomly with a given percentage. + + To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_MATCH_STRING tristate '"string" match support' diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6fa4b7580458..a74be492fd0a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o @@ -37,6 +38,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 8455a32ea5c4..d80b935b3a92 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -10,7 +10,6 @@ * 15-Mar-2000: Added NF_REPEAT --RR. * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/netfilter.h> #include <net/protocol.h> @@ -183,7 +182,7 @@ next_hook: ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } @@ -223,6 +222,28 @@ copy_skb: } EXPORT_SYMBOL(skb_make_writable); +u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum) +{ + u_int32_t diff[] = { oldval, newval }; + + return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum)); +} +EXPORT_SYMBOL(nf_csum_update); + +u_int16_t nf_proto_csum_update(struct sk_buff *skb, + u_int32_t oldval, u_int32_t newval, + u_int16_t csum, int pseudohdr) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + csum = nf_csum_update(oldval, newval, csum); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = nf_csum_update(oldval, newval, skb->csum); + } else if (pseudohdr) + csum = ~nf_csum_update(oldval, newval, ~csum); + + return csum; +} +EXPORT_SYMBOL(nf_proto_csum_update); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cd299f4b7db1..093b3ddc513c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -29,7 +29,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_core.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> @@ -58,7 +57,6 @@ #include <net/netfilter/nf_conntrack_protocol.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> #define NF_CONNTRACK_VERSION "0.5.0" @@ -75,17 +73,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0); void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; LIST_HEAD(nf_conntrack_expect_list); -struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; -struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; +struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly; static LIST_HEAD(helpers); -unsigned int nf_conntrack_htable_size = 0; -int nf_conntrack_max; -struct list_head *nf_conntrack_hash; -static kmem_cache_t *nf_conntrack_expect_cachep; +unsigned int nf_conntrack_htable_size __read_mostly = 0; +int nf_conntrack_max __read_mostly; +struct list_head *nf_conntrack_hash __read_mostly; +static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; struct nf_conn nf_conntrack_untracked; -unsigned int nf_ct_log_invalid; +unsigned int nf_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc; +static int nf_conntrack_vmalloc __read_mostly; static unsigned int nf_conntrack_next_id; static unsigned int nf_conntrack_expect_next_id; @@ -540,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct) static void clean_from_lists(struct nf_conn *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&nf_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -618,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_put(ct); } -static inline int -conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) -{ - ASSERT_READ_LOCK(&nf_conntrack_lock); - return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack - && nf_ct_tuple_equal(tuple, &i->tuple); -} - struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) @@ -637,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, ASSERT_READ_LOCK(&nf_conntrack_lock); list_for_each_entry(h, &nf_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); return h; } @@ -668,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int repl_hash) { ct->id = ++nf_conntrack_next_id; - list_prepend(&nf_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&nf_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &nf_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -691,7 +675,9 @@ int __nf_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + struct nf_conn_help *help; enum ip_conntrack_info ctinfo; ct = nf_ct_get(*pskb, &ctinfo); @@ -721,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&nf_conntrack_hash[hash], - conntrack_tuple_cmp, - struct nf_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&nf_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct nf_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - struct nf_conn_help *help; - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &nf_conntrack_hash[hash], list) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __nf_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); - write_unlock_bh(&nf_conntrack_lock); - help = nfct_help(ct); - if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __nf_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + NF_CT_STAT_INC(insert); + write_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); + if (help && help->helper) + nf_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + nf_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; +out: NF_CT_STAT_INC(insert_failed); write_unlock_bh(&nf_conntrack_lock); return NF_DROP; @@ -778,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct nf_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, - &nf_ct_tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct = NULL; + struct nf_conn *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); - if (h) { - ct = nf_ct_tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = nf_ct_tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&nf_conntrack_lock); @@ -811,18 +794,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct nf_conntrack_helper *i, - const struct nf_conntrack_tuple *rtuple) -{ - return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct nf_conntrack_helper *, - tuple); + struct nf_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct nf_conntrack_helper * @@ -867,11 +848,15 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, nf_conntrack_hash_rnd_initted = 1; } + /* We don't want any race condition at early drop stage */ + atomic_inc(&nf_conntrack_count); + if (nf_conntrack_max - && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { + && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&nf_conntrack_hash[hash])) { + atomic_dec(&nf_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -922,10 +907,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; + read_unlock_bh(&nf_ct_cache_lock); - atomic_inc(&nf_conntrack_count); + return conntrack; out: read_unlock_bh(&nf_ct_cache_lock); + atomic_dec(&nf_conntrack_count); return conntrack; } @@ -1324,7 +1311,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) return ret; } write_lock_bh(&nf_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&nf_conntrack_lock); return 0; @@ -1343,8 +1330,8 @@ __nf_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) +static inline void unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); @@ -1353,17 +1340,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, nf_conntrack_event(IPCT_HELPER, ct); help->helper = NULL; } - return 0; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { unsigned int i; + struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { @@ -1375,10 +1362,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); - for (i = 0; i < nf_conntrack_htable_size; i++) - LIST_FIND_W(&nf_conntrack_hash[i], unhelp, - struct nf_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < nf_conntrack_htable_size; i++) { + list_for_each_entry(h, &nf_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&nf_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1511,37 +1500,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i, } /* Bring out ya dead! */ -static struct nf_conntrack_tuple_hash * +static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { - struct nf_conntrack_tuple_hash *h = NULL; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; write_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, - struct nf_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct nf_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + list_for_each_entry(h, &unconfirmed, list) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + return NULL; +found: + atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&nf_conntrack_lock); - - return h; + return ct; } void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) { - struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 11d3be243536..0c17a5bd112b 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -15,13 +15,13 @@ * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c */ -#include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/ctype.h> +#include <linux/inet.h> #include <net/checksum.h> #include <net/tcp.h> @@ -112,101 +112,14 @@ static struct ftp_search { }, }; -/* This code is based on inet_pton() in glibc-2.2.4 */ static int get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) { - static const char xdigits[] = "0123456789abcdef"; - u_int8_t tmp[16], *tp, *endp, *colonp; - int ch, saw_xdigit; - u_int32_t val; - size_t clen = 0; - - tp = memset(tmp, '\0', sizeof(tmp)); - endp = tp + sizeof(tmp); - colonp = NULL; - - /* Leading :: requires some special handling. */ - if (*src == ':'){ - if (*++src != ':') { - DEBUGP("invalid \":\" at the head of addr\n"); - return 0; - } - clen++; - } - - saw_xdigit = 0; - val = 0; - while ((clen < dlen) && (*src != term)) { - const char *pch; - - ch = tolower(*src++); - clen++; - - pch = strchr(xdigits, ch); - if (pch != NULL) { - val <<= 4; - val |= (pch - xdigits); - if (val > 0xffff) - return 0; - - saw_xdigit = 1; - continue; - } - if (ch != ':') { - DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); - return 0; - } - - if (!saw_xdigit) { - if (colonp) { - DEBUGP("invalid location of \"::\".\n"); - return 0; - } - colonp = tp; - continue; - } else if (*src == term) { - DEBUGP("trancated IPv6 addr\n"); - return 0; - } - - if (tp + 2 > endp) - return 0; - *tp++ = (u_int8_t) (val >> 8) & 0xff; - *tp++ = (u_int8_t) val & 0xff; - - saw_xdigit = 0; - val = 0; - continue; - } - if (saw_xdigit) { - if (tp + 2 > endp) - return 0; - *tp++ = (u_int8_t) (val >> 8) & 0xff; - *tp++ = (u_int8_t) val & 0xff; - } - if (colonp != NULL) { - /* - * Since some memmove()'s erroneously fail to handle - * overlapping regions, we'll do the shift by hand. - */ - const int n = tp - colonp; - int i; - - if (tp == endp) - return 0; - - for (i = 1; i <= n; i++) { - endp[- i] = colonp[n - i]; - colonp[n - i] = 0; - } - tp = endp; - } - if (tp != endp || (*src != term)) - return 0; - - memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); - return clen; + const char *end; + int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end); + if (ret > 0) + return (int)(end - src); + return 0; } static int try_number(const char *data, size_t dlen, u_int32_t array[], diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 3fc58e454d4e..21e0bc91cf23 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -15,7 +15,6 @@ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b8c7c567c9df..1721f7c78c77 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/errno.h> #include <linux/netlink.h> #include <linux/spinlock.h> +#include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/netfilter.h> @@ -338,11 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | - IPCT_PROTOINFO | - IPCT_HELPER | - IPCT_HELPINFO | - IPCT_NATINFO)) { + } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -394,6 +391,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; + if (events & IPCT_MARK + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; @@ -428,9 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&nf_conntrack_lock); + last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - last = (struct nf_conn *)cb->args[1]; list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { h = (struct nf_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) @@ -441,13 +442,10 @@ restart: * then dump everything. */ if (l3proto && L3PROTO(ct) != l3proto) continue; - if (last != NULL) { - if (ct == last) { - nf_ct_put(last); - cb->args[1] = 0; - last = NULL; - } else + if (cb->args[1]) { + if (ct != last) continue; + cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -457,65 +455,26 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } +#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) + memset(&ct->counters, 0, sizeof(ct->counters)); +#endif } - if (last != NULL) { - nf_ct_put(last); + if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: read_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_put(last); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - return skb->len; } -#ifdef CONFIG_NF_CT_ACCT -static int -ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct nf_conn *ct = NULL; - struct nf_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); - u_int8_t l3proto = nfmsg->nfgen_family; - - DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, - cb->args[0], *id); - - write_lock_bh(&nf_conntrack_lock); - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { - h = (struct nf_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && L3PROTO(ct) != l3proto) - continue; - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - - memset(&ct->counters, 0, sizeof(ct->counters)); - } - } -out: - write_unlock_bh(&nf_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} -#endif - static inline int ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) { @@ -790,22 +749,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { u32 rlen; - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { -#ifdef CONFIG_NF_CT_ACCT - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table_w, - ctnetlink_done)) != 0) - return -EINVAL; -#else +#ifndef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) return -ENOTSUPP; #endif - } else { - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; - } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) @@ -1276,6 +1227,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; + if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + return NOTIFY_DONE; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 46bc27e2756d..26408bb0955b 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -17,7 +17,7 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack_protocol.h> -unsigned int nf_ct_generic_timeout = 600*HZ; +unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 0c6da496cfa9..af568777372b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -28,6 +28,8 @@ #include <linux/sctp.h> #include <linux/string.h> #include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_protocol.h> @@ -62,13 +64,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int nf_ct_sctp_timeout_closed = 10 SECS; -static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS; -static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned int nf_ct_sctp_timeout_established = 5 DAYS; -static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; +static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS; +static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; +static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; +static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS; +static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; +static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; +static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; static unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ @@ -259,7 +261,7 @@ static int do_basic_checks(struct nf_conn *conntrack, } DEBUGP("Basic checks passed\n"); - return 0; + return count == 0; } static int new_state(enum ip_conntrack_dir dir, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 12fb7c0a1509..238bbb5b72ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -24,7 +24,6 @@ * version 2.2 */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> @@ -58,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int nf_ct_tcp_be_liberal = 0; +int nf_ct_tcp_be_liberal __read_mostly = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int nf_ct_tcp_loose = 3; +int nf_ct_tcp_loose __read_mostly = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int nf_ct_tcp_max_retrans = 3; +int nf_ct_tcp_max_retrans __read_mostly = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -93,19 +92,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned int nf_ct_tcp_timeout_established = 5 DAYS; -unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS; -unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS; -unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS; -unsigned int nf_ct_tcp_timeout_close = 10 SECS; +unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; +unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; +unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; +unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; +unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; +unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; +unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; +unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; static unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ @@ -689,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } @@ -824,8 +825,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ae07ebe3ab37..d28981cf9af5 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -27,8 +27,8 @@ #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack_protocol.h> -unsigned int nf_ct_udp_timeout = 30*HZ; -unsigned int nf_ct_udp_timeout_stream = 180*HZ; +unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; +unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e34c574f0351..5954f6773810 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -17,7 +17,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> @@ -38,7 +37,6 @@ #include <net/netfilter/nf_conntrack_protocol.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -429,6 +427,8 @@ static struct file_operations ct_cpu_seq_fops = { /* Sysctl support */ +int nf_conntrack_checksum __read_mostly = 1; + #ifdef CONFIG_SYSCTL /* From nf_conntrack_core.c */ @@ -460,8 +460,6 @@ extern unsigned int nf_ct_generic_timeout; static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -int nf_conntrack_checksum = 1; - static struct ctl_table_header *nf_ct_sysctl_header; static ctl_table nf_ct_sysctl_table[] = { diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 6bdee2910617..a981971ce1d5 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -1,7 +1,6 @@ #ifndef _NF_INTERNALS_H #define _NF_INTERNALS_H -#include <linux/config.h> #include <linux/list.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -24,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head, int hook_thresh); /* nf_queue.c */ -extern int nf_queue(struct sk_buff **skb, +extern int nf_queue(struct sk_buff *skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3e76bd0824a2..8901b3a07f7e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ee8f70889f47..4d8936ed581d 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> @@ -75,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); * Any packet that leaves via this function must come back * through nf_reinject(). */ -int nf_queue(struct sk_buff **skb, - struct list_head *elem, - int pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) +static int __nf_queue(struct sk_buff *skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -95,14 +94,14 @@ int nf_queue(struct sk_buff **skb, read_lock(&queue_handler_lock); if (!queue_handler[pf]) { read_unlock(&queue_handler_lock); - kfree_skb(*skb); + kfree_skb(skb); return 1; } afinfo = nf_get_afinfo(pf); if (!afinfo) { read_unlock(&queue_handler_lock); - kfree_skb(*skb); + kfree_skb(skb); return 1; } @@ -110,9 +109,9 @@ int nf_queue(struct sk_buff **skb, if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - *skb); + skb); read_unlock(&queue_handler_lock); - kfree_skb(*skb); + kfree_skb(skb); return 1; } @@ -131,15 +130,15 @@ int nf_queue(struct sk_buff **skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if ((*skb)->nf_bridge) { - physindev = (*skb)->nf_bridge->physindev; + if (skb->nf_bridge) { + physindev = skb->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = (*skb)->nf_bridge->physoutdev; + physoutdev = skb->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif - afinfo->saveroute(*skb, info); - status = queue_handler[pf]->outfn(*skb, info, queuenum, + afinfo->saveroute(skb, info); + status = queue_handler[pf]->outfn(skb, info, queuenum, queue_handler[pf]->data); read_unlock(&queue_handler_lock); @@ -154,7 +153,7 @@ int nf_queue(struct sk_buff **skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(*skb); + kfree_skb(skb); return 1; } @@ -162,6 +161,46 @@ int nf_queue(struct sk_buff **skb, return 1; } +int nf_queue(struct sk_buff *skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum) +{ + struct sk_buff *segs; + + if (!skb_is_gso(skb)) + return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, + queuenum); + + switch (pf) { + case AF_INET: + skb->protocol = htons(ETH_P_IP); + break; + case AF_INET6: + skb->protocol = htons(ETH_P_IPV6); + break; + } + + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return 1; + + do { + struct sk_buff *nskb = segs->next; + + segs->next = NULL; + if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, + queuenum)) + kfree_skb(segs); + segs = nskb; + } while (segs); + return 1; +} + void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict) { @@ -220,21 +259,20 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: + case NF_STOP: info->okfn(skb); + case NF_STOLEN: break; - case NF_QUEUE: - if (!nf_queue(&skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn, - verdict >> NF_VERDICT_BITS)) + if (!__nf_queue(skb, elem, info->pf, info->hook, + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; + default: + kfree_skb(skb); } rcu_read_unlock(); - - if (verdict == NF_DROP) - kfree_skb(skb); - kfree(info); return; } diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 0a63d7dac7be..c2e44e90e437 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b88e82a1a987..52fdfa2686c9 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -14,7 +14,6 @@ * of the GNU General Public License, incorporated herein by reference. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/socket.h> @@ -229,7 +228,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, NFNL_SUBSYS_ID(nlh->nlmsg_type), NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { + if (security_netlink_recv(skb, CAP_NET_ADMIN)) { DEBUGP("missing CAP_NET_ADMIN\n"); *errp = -EPERM; return -1; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 61cdda4e5d3b..b59d3b2bde21 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst) if (timer_pending(&inst->timer)) del_timer(&inst->timer); + if (!inst->skb) + return 0; + if (inst->qlen > 1) inst->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 86a4ac33de34..8eb2473d83e1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; case NFQNL_COPY_PACKET: - if (entskb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entskb, - outdev == NULL))) { + if ((entskb->ip_summed == CHECKSUM_PARTIAL || + entskb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entskb))) { spin_unlock_bh(&queue->lock); return NULL; } @@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, queue->queue_dropped++; status = -ENOSPC; if (net_ratelimit()) - printk(KERN_WARNING "ip_queue: full at %d entries, " + printk(KERN_WARNING "nf_queue: full at %d entries, " "dropping packets(s). Dropped: %d\n", queue->queue_total, queue->queue_dropped); goto err_out_free_nskb; @@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) diff, GFP_ATOMIC); if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " + printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); return -ENOMEM; } @@ -680,11 +680,19 @@ dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) if (entinf->indev) if (entinf->indev->ifindex == ifindex) return 1; - if (entinf->outdev) if (entinf->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 99293c63ff73..58522fc65d33 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -13,7 +13,6 @@ * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/socket.h> #include <linux/net.h> @@ -82,12 +81,42 @@ xt_unregister_target(struct xt_target *target) int af = target->family; mutex_lock(&xt[af].mutex); - LIST_DELETE(&xt[af].target, target); + list_del(&target->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_target); int +xt_register_targets(struct xt_target *target, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = xt_register_target(&target[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + xt_unregister_targets(target, i); + return err; +} +EXPORT_SYMBOL(xt_register_targets); + +void +xt_unregister_targets(struct xt_target *target, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + xt_unregister_target(&target[i]); +} +EXPORT_SYMBOL(xt_unregister_targets); + +int xt_register_match(struct xt_match *match) { int ret, af = match->family; @@ -109,11 +138,41 @@ xt_unregister_match(struct xt_match *match) int af = match->family; mutex_lock(&xt[af].mutex); - LIST_DELETE(&xt[af].match, match); + list_del(&match->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_match); +int +xt_register_matches(struct xt_match *match, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = xt_register_match(&match[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + xt_unregister_matches(match, i); + return err; +} +EXPORT_SYMBOL(xt_register_matches); + +void +xt_unregister_matches(struct xt_match *match, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + xt_unregister_match(&match[i]); +} +EXPORT_SYMBOL(xt_unregister_matches); + /* * These are weird, but module loading must not be done with mutex @@ -274,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_match(void *match, void **dstptr, int *size, int convert) +int xt_compat_match_offset(struct xt_match *match) { - struct xt_match *m; - struct compat_xt_entry_match *pcompat_m; - struct xt_entry_match *pm; - u_int16_t msize; - int off, ret; + u_int16_t csize = match->compatsize ? : match->matchsize; + return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); +} +EXPORT_SYMBOL_GPL(xt_compat_match_offset); - ret = 0; - m = ((struct xt_entry_match *)match)->u.kernel.match; - off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize); - switch (convert) { - case COMPAT_TO_USER: - pm = (struct xt_entry_match *)match; - msize = pm->u.user.match_size; - if (copy_to_user(*dstptr, pm, msize)) { - ret = -EFAULT; - break; - } - msize -= off; - if (put_user(msize, (u_int16_t *)*dstptr)) - ret = -EFAULT; - *size -= off; - *dstptr += msize; - break; - case COMPAT_FROM_USER: - pcompat_m = (struct compat_xt_entry_match *)match; - pm = (struct xt_entry_match *)*dstptr; - msize = pcompat_m->u.user.match_size; - memcpy(pm, pcompat_m, msize); - msize += off; - pm->u.user.match_size = msize; - *size += off; - *dstptr += msize; - break; - case COMPAT_CALC_SIZE: - *size += off; - break; - default: - ret = -ENOPROTOOPT; - break; +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + int *size) +{ + struct xt_match *match = m->u.kernel.match; + struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; + int pad, off = xt_compat_match_offset(match); + u_int16_t msize = cm->u.user.match_size; + + m = *dstptr; + memcpy(m, cm, sizeof(*cm)); + if (match->compat_from_user) + match->compat_from_user(m->data, cm->data); + else + memcpy(m->data, cm->data, msize - sizeof(*cm)); + pad = XT_ALIGN(match->matchsize) - match->matchsize; + if (pad > 0) + memset(m->data + match->matchsize, 0, pad); + + msize += off; + m->u.user.match_size = msize; + + *size += off; + *dstptr += msize; +} +EXPORT_SYMBOL_GPL(xt_compat_match_from_user); + +int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, + int *size) +{ + struct xt_match *match = m->u.kernel.match; + struct compat_xt_entry_match __user *cm = *dstptr; + int off = xt_compat_match_offset(match); + u_int16_t msize = m->u.user.match_size - off; + + if (copy_to_user(cm, m, sizeof(*cm)) || + put_user(msize, &cm->u.user.match_size)) + return -EFAULT; + + if (match->compat_to_user) { + if (match->compat_to_user((void __user *)cm->data, m->data)) + return -EFAULT; + } else { + if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) + return -EFAULT; } - return ret; + + *size -= off; + *dstptr += msize; + return 0; } -EXPORT_SYMBOL_GPL(xt_compat_match); -#endif +EXPORT_SYMBOL_GPL(xt_compat_match_to_user); +#endif /* CONFIG_COMPAT */ int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook_mask, @@ -351,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_target); #ifdef CONFIG_COMPAT -int xt_compat_target(void *target, void **dstptr, int *size, int convert) +int xt_compat_target_offset(struct xt_target *target) { - struct xt_target *t; - struct compat_xt_entry_target *pcompat; - struct xt_entry_target *pt; - u_int16_t tsize; - int off, ret; + u_int16_t csize = target->compatsize ? : target->targetsize; + return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); +} +EXPORT_SYMBOL_GPL(xt_compat_target_offset); - ret = 0; - t = ((struct xt_entry_target *)target)->u.kernel.target; - off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize); - switch (convert) { - case COMPAT_TO_USER: - pt = (struct xt_entry_target *)target; - tsize = pt->u.user.target_size; - if (copy_to_user(*dstptr, pt, tsize)) { - ret = -EFAULT; - break; - } - tsize -= off; - if (put_user(tsize, (u_int16_t *)*dstptr)) - ret = -EFAULT; - *size -= off; - *dstptr += tsize; - break; - case COMPAT_FROM_USER: - pcompat = (struct compat_xt_entry_target *)target; - pt = (struct xt_entry_target *)*dstptr; - tsize = pcompat->u.user.target_size; - memcpy(pt, pcompat, tsize); - tsize += off; - pt->u.user.target_size = tsize; - *size += off; - *dstptr += tsize; - break; - case COMPAT_CALC_SIZE: - *size += off; - break; - default: - ret = -ENOPROTOOPT; - break; +void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, + int *size) +{ + struct xt_target *target = t->u.kernel.target; + struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; + int pad, off = xt_compat_target_offset(target); + u_int16_t tsize = ct->u.user.target_size; + + t = *dstptr; + memcpy(t, ct, sizeof(*ct)); + if (target->compat_from_user) + target->compat_from_user(t->data, ct->data); + else + memcpy(t->data, ct->data, tsize - sizeof(*ct)); + pad = XT_ALIGN(target->targetsize) - target->targetsize; + if (pad > 0) + memset(t->data + target->targetsize, 0, pad); + + tsize += off; + t->u.user.target_size = tsize; + + *size += off; + *dstptr += tsize; +} +EXPORT_SYMBOL_GPL(xt_compat_target_from_user); + +int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, + int *size) +{ + struct xt_target *target = t->u.kernel.target; + struct compat_xt_entry_target __user *ct = *dstptr; + int off = xt_compat_target_offset(target); + u_int16_t tsize = t->u.user.target_size - off; + + if (copy_to_user(ct, t, sizeof(*ct)) || + put_user(tsize, &ct->u.user.target_size)) + return -EFAULT; + + if (target->compat_to_user) { + if (target->compat_to_user((void __user *)ct->data, t->data)) + return -EFAULT; + } else { + if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) + return -EFAULT; } - return ret; + + *size -= off; + *dstptr += tsize; + return 0; } -EXPORT_SYMBOL_GPL(xt_compat_target); +EXPORT_SYMBOL_GPL(xt_compat_target_to_user); #endif struct xt_table_info *xt_alloc_table_info(unsigned int size) @@ -516,15 +601,18 @@ int xt_register_table(struct xt_table *table, { int ret; struct xt_table_info *private; + struct xt_table *t; ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) return ret; /* Don't autoload: we'd eat our tail... */ - if (list_named_find(&xt[table->af].tables, table->name)) { - ret = -EEXIST; - goto unlock; + list_for_each_entry(t, &xt[table->af].tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + goto unlock; + } } /* Simplifies replace_table code. */ @@ -539,7 +627,7 @@ int xt_register_table(struct xt_table *table, /* save number of initial entries */ private->initial_entries = private->number; - list_prepend(&xt[table->af].tables, table); + list_add(&table->list, &xt[table->af].tables); ret = 0; unlock: @@ -554,7 +642,7 @@ void *xt_unregister_table(struct xt_table *table) mutex_lock(&xt[table->af].mutex); private = table->private; - LIST_DELETE(&xt[table->af].tables, table); + list_del(&table->list); mutex_unlock(&xt[table->af].mutex); return private; diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index e54e57730012..50de965bb104 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -29,8 +29,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_classify_target_info *clinfo = targinfo; @@ -40,47 +39,41 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target classify_reg = { - .name = "CLASSIFY", - .target = target, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | - (1 << NF_IP_POST_ROUTING), - .family = AF_INET, - .me = THIS_MODULE, +static struct xt_target xt_classify_target[] = { + { + .family = AF_INET, + .name = "CLASSIFY", + .target = target, + .targetsize = sizeof(struct xt_classify_target_info), + .table = "mangle", + .hooks = (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING), + .me = THIS_MODULE, + }, + { + .name = "CLASSIFY", + .family = AF_INET6, + .target = target, + .targetsize = sizeof(struct xt_classify_target_info), + .table = "mangle", + .hooks = (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING), + .me = THIS_MODULE, + }, }; -static struct xt_target classify6_reg = { - .name = "CLASSIFY", - .target = target, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | - (1 << NF_IP_POST_ROUTING), - .family = AF_INET6, - .me = THIS_MODULE, -}; - static int __init xt_classify_init(void) { - int ret; - - ret = xt_register_target(&classify_reg); - if (ret) - return ret; - - ret = xt_register_target(&classify6_reg); - if (ret) - xt_unregister_target(&classify_reg); - - return ret; + return xt_register_targets(xt_classify_target, + ARRAY_SIZE(xt_classify_target)); } static void __exit xt_classify_fini(void) { - xt_unregister_target(&classify_reg); - xt_unregister_target(&classify6_reg); + xt_unregister_targets(xt_classify_target, + ARRAY_SIZE(xt_classify_target)); } module_init(xt_classify_init); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 60c375d36f01..c01524f817f0 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -38,8 +38,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_connmark_target_info *markinfo = targinfo; u_int32_t diff; @@ -49,24 +48,37 @@ target(struct sk_buff **pskb, u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); if (ctmark) { - switch(markinfo->mode) { - case XT_CONNMARK_SET: - newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; - if (newmark != *ctmark) - *ctmark = newmark; - break; - case XT_CONNMARK_SAVE: - newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); - if (*ctmark != newmark) - *ctmark = newmark; - break; - case XT_CONNMARK_RESTORE: - nfmark = (*pskb)->nfmark; - diff = (*ctmark ^ nfmark) & markinfo->mask; - if (diff != 0) - (*pskb)->nfmark = nfmark ^ diff; - break; - } + switch(markinfo->mode) { + case XT_CONNMARK_SET: + newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; + if (newmark != *ctmark) { + *ctmark = newmark; +#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) + ip_conntrack_event_cache(IPCT_MARK, *pskb); +#else + nf_conntrack_event_cache(IPCT_MARK, *pskb); +#endif + } + break; + case XT_CONNMARK_SAVE: + newmark = (*ctmark & ~markinfo->mask) | + ((*pskb)->nfmark & markinfo->mask); + if (*ctmark != newmark) { + *ctmark = newmark; +#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) + ip_conntrack_event_cache(IPCT_MARK, *pskb); +#else + nf_conntrack_event_cache(IPCT_MARK, *pskb); +#endif + } + break; + case XT_CONNMARK_RESTORE: + nfmark = (*pskb)->nfmark; + diff = (*ctmark ^ nfmark) & markinfo->mask; + if (diff != 0) + (*pskb)->nfmark = nfmark ^ diff; + break; + } } return XT_CONTINUE; @@ -77,65 +89,91 @@ checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_connmark_target_info *matchinfo = targinfo; if (matchinfo->mode == XT_CONNMARK_RESTORE) { - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; - } + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "CONNMARK: restore can only be " + "called from \"mangle\" table, not \"%s\"\n", + tablename); + return 0; + } } - if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); return 0; } - return 1; } -static struct xt_target connmark_reg = { - .name = "CONNMARK", - .target = target, - .targetsize = sizeof(struct xt_connmark_target_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE +#ifdef CONFIG_COMPAT +struct compat_xt_connmark_target_info { + compat_ulong_t mark, mask; + u_int8_t mode; + u_int8_t __pad1; + u_int16_t __pad2; }; -static struct xt_target connmark6_reg = { - .name = "CONNMARK", - .target = target, - .targetsize = sizeof(struct xt_connmark_target_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_connmark_target_info *cm = src; + struct xt_connmark_target_info m = { + .mark = cm->mark, + .mask = cm->mask, + .mode = cm->mode, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_connmark_target_info *m = src; + struct compat_xt_connmark_target_info cm = { + .mark = m->mark, + .mask = m->mask, + .mode = m->mode, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + +static struct xt_target xt_connmark_target[] = { + { + .name = "CONNMARK", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connmark_target_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_connmark_target_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif + .me = THIS_MODULE + }, + { + .name = "CONNMARK", + .family = AF_INET6, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connmark_target_info), + .me = THIS_MODULE + }, }; static int __init xt_connmark_init(void) { - int ret; - need_conntrack(); - - ret = xt_register_target(&connmark_reg); - if (ret) - return ret; - - ret = xt_register_target(&connmark6_reg); - if (ret) - xt_unregister_target(&connmark_reg); - - return ret; + return xt_register_targets(xt_connmark_target, + ARRAY_SIZE(xt_connmark_target)); } static void __exit xt_connmark_fini(void) { - xt_unregister_target(&connmark_reg); - xt_unregister_target(&connmark6_reg); + xt_unregister_targets(xt_connmark_target, + ARRAY_SIZE(xt_connmark_target)); } module_init(xt_connmark_init); diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 8c011e020769..467386266674 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb) static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; @@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_connsecmark_target_info *info = targinfo; @@ -106,49 +106,38 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target ipt_connsecmark_reg = { - .name = "CONNSECMARK", - .target = target, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 0, -}; - -static struct xt_target ip6t_connsecmark_reg = { - .name = "CONNSECMARK", - .target = target, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET6, - .revision = 0, +static struct xt_target xt_connsecmark_target[] = { + { + .name = "CONNSECMARK", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "CONNSECMARK", + .family = AF_INET6, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_connsecmark_init(void) { - int err; - need_conntrack(); - - err = xt_register_target(&ipt_connsecmark_reg); - if (err) - return err; - - err = xt_register_target(&ip6t_connsecmark_reg); - if (err) - xt_unregister_target(&ipt_connsecmark_reg); - - return err; + return xt_register_targets(xt_connsecmark_target, + ARRAY_SIZE(xt_connsecmark_target)); } static void __exit xt_connsecmark_fini(void) { - xt_unregister_target(&ip6t_connsecmark_reg); - xt_unregister_target(&ipt_connsecmark_reg); + xt_unregister_targets(xt_connsecmark_target, + ARRAY_SIZE(xt_connsecmark_target)); } module_init(xt_connsecmark_init); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c new file mode 100644 index 000000000000..a7cc75aeb38d --- /dev/null +++ b/net/netfilter/xt_DSCP.c @@ -0,0 +1,118 @@ +/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 + * + * (C) 2002 by Harald Welte <laforge@netfilter.org> + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp +*/ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/dsfield.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_DSCP.h> + +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_DESCRIPTION("x_tables DSCP modification module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_DSCP"); +MODULE_ALIAS("ip6t_DSCP"); + +static unsigned int target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + const struct xt_DSCP_info *dinfo = targinfo; + u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; + + if (dscp != dinfo->dscp) { + if (!skb_make_writable(pskb, sizeof(struct iphdr))) + return NF_DROP; + + ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + + } + return XT_CONTINUE; +} + +static unsigned int target6(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + const struct xt_DSCP_info *dinfo = targinfo; + u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; + + if (dscp != dinfo->dscp) { + if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) + return NF_DROP; + + ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + } + return XT_CONTINUE; +} + +static int checkentry(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; + + if ((dscp > XT_DSCP_MAX)) { + printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); + return 0; + } + return 1; +} + +static struct xt_target xt_dscp_target[] = { + { + .name = "DSCP", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "DSCP", + .family = AF_INET6, + .checkentry = checkentry, + .target = target6, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .me = THIS_MODULE, + }, +}; + +static int __init xt_dscp_target_init(void) +{ + return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); +} + +static void __exit xt_dscp_target_fini(void) +{ + xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); +} + +module_init(xt_dscp_target_init); +module_exit(xt_dscp_target_fini); diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index ee9c34edc76c..c6e860a7114f 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_mark_target_info *markinfo = targinfo; @@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_mark_target_info_v1 *markinfo = targinfo; int mark = 0; @@ -76,7 +74,6 @@ checkentry_v0(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info *markinfo = targinfo; @@ -93,7 +90,6 @@ checkentry_v1(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info_v1 *markinfo = targinfo; @@ -112,65 +108,81 @@ checkentry_v1(const char *tablename, return 1; } -static struct xt_target ipt_mark_reg_v0 = { - .name = "MARK", - .target = target_v0, - .targetsize = sizeof(struct xt_mark_target_info), - .table = "mangle", - .checkentry = checkentry_v0, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 0, +#ifdef CONFIG_COMPAT +struct compat_xt_mark_target_info_v1 { + compat_ulong_t mark; + u_int8_t mode; + u_int8_t __pad1; + u_int16_t __pad2; }; -static struct xt_target ipt_mark_reg_v1 = { - .name = "MARK", - .target = target_v1, - .targetsize = sizeof(struct xt_mark_target_info_v1), - .table = "mangle", - .checkentry = checkentry_v1, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 1, -}; +static void compat_from_user_v1(void *dst, void *src) +{ + struct compat_xt_mark_target_info_v1 *cm = src; + struct xt_mark_target_info_v1 m = { + .mark = cm->mark, + .mode = cm->mode, + }; + memcpy(dst, &m, sizeof(m)); +} -static struct xt_target ip6t_mark_reg_v0 = { - .name = "MARK", - .target = target_v0, - .targetsize = sizeof(struct xt_mark_target_info), - .table = "mangle", - .checkentry = checkentry_v0, - .me = THIS_MODULE, - .family = AF_INET6, - .revision = 0, +static int compat_to_user_v1(void __user *dst, void *src) +{ + struct xt_mark_target_info_v1 *m = src; + struct compat_xt_mark_target_info_v1 cm = { + .mark = m->mark, + .mode = m->mode, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + +static struct xt_target xt_mark_target[] = { + { + .name = "MARK", + .family = AF_INET, + .revision = 0, + .checkentry = checkentry_v0, + .target = target_v0, + .targetsize = sizeof(struct xt_mark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "MARK", + .family = AF_INET, + .revision = 1, + .checkentry = checkentry_v1, + .target = target_v1, + .targetsize = sizeof(struct xt_mark_target_info_v1), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_mark_target_info_v1), + .compat_from_user = compat_from_user_v1, + .compat_to_user = compat_to_user_v1, +#endif + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "MARK", + .family = AF_INET6, + .revision = 0, + .checkentry = checkentry_v0, + .target = target_v0, + .targetsize = sizeof(struct xt_mark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_mark_init(void) { - int err; - - err = xt_register_target(&ipt_mark_reg_v0); - if (err) - return err; - - err = xt_register_target(&ipt_mark_reg_v1); - if (err) - xt_unregister_target(&ipt_mark_reg_v0); - - err = xt_register_target(&ip6t_mark_reg_v0); - if (err) { - xt_unregister_target(&ipt_mark_reg_v0); - xt_unregister_target(&ipt_mark_reg_v1); - } - - return err; + return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); } static void __exit xt_mark_fini(void) { - xt_unregister_target(&ipt_mark_reg_v0); - xt_unregister_target(&ipt_mark_reg_v1); - xt_unregister_target(&ip6t_mark_reg_v0); + xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); } module_init(xt_mark_init); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 86ccceb61fdd..db9b896e57c8 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -29,65 +29,46 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_NFQ_info *tinfo = targinfo; return NF_QUEUE_NR(tinfo->queuenum); } -static struct xt_target ipt_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_target ip6t_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .family = AF_INET6, - .me = THIS_MODULE, -}; - -static struct xt_target arpt_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .family = NF_ARP, - .me = THIS_MODULE, +static struct xt_target xt_nfqueue_target[] = { + { + .name = "NFQUEUE", + .family = AF_INET, + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .me = THIS_MODULE, + }, + { + .name = "NFQUEUE", + .family = AF_INET6, + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .me = THIS_MODULE, + }, + { + .name = "NFQUEUE", + .family = NF_ARP, + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .me = THIS_MODULE, + }, }; static int __init xt_nfqueue_init(void) { - int ret; - ret = xt_register_target(&ipt_NFQ_reg); - if (ret) - return ret; - ret = xt_register_target(&ip6t_NFQ_reg); - if (ret) - goto out_ip; - ret = xt_register_target(&arpt_NFQ_reg); - if (ret) - goto out_ip6; - - return ret; -out_ip6: - xt_unregister_target(&ip6t_NFQ_reg); -out_ip: - xt_unregister_target(&ipt_NFQ_reg); - - return ret; + return xt_register_targets(xt_nfqueue_target, + ARRAY_SIZE(xt_nfqueue_target)); } static void __exit xt_nfqueue_fini(void) { - xt_unregister_target(&arpt_NFQ_reg); - xt_unregister_target(&ip6t_NFQ_reg); - xt_unregister_target(&ipt_NFQ_reg); + xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target)); } module_init(xt_nfqueue_init); diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 98f4b5363ce8..6d00dcaed238 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -16,8 +16,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { /* Previously seen (loopback)? Ignore. */ if ((*pskb)->nfct != NULL) @@ -34,43 +33,32 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target notrack_reg = { - .name = "NOTRACK", - .target = target, - .targetsize = 0, - .table = "raw", - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_target notrack6_reg = { - .name = "NOTRACK", - .target = target, - .targetsize = 0, - .table = "raw", - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_target xt_notrack_target[] = { + { + .name = "NOTRACK", + .family = AF_INET, + .target = target, + .table = "raw", + .me = THIS_MODULE, + }, + { + .name = "NOTRACK", + .family = AF_INET6, + .target = target, + .table = "raw", + .me = THIS_MODULE, + }, }; static int __init xt_notrack_init(void) { - int ret; - - ret = xt_register_target(¬rack_reg); - if (ret) - return ret; - - ret = xt_register_target(¬rack6_reg); - if (ret) - xt_unregister_target(¬rack_reg); - - return ret; + return xt_register_targets(xt_notrack_target, + ARRAY_SIZE(xt_notrack_target)); } static void __exit xt_notrack_fini(void) { - xt_unregister_target(¬rack6_reg); - xt_unregister_target(¬rack_reg); + xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target)); } module_init(xt_notrack_init); diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c2ce9c4011cc..add752196290 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -31,7 +31,7 @@ static u8 mode; static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { u32 secmark = 0; const struct xt_secmark_target_info *info = targinfo; @@ -57,6 +57,8 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; + + sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; err = selinux_string_to_sid(sel->selctx, &sel->selsid); if (err) { @@ -83,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_secmark_target_info *info = targinfo; @@ -109,47 +111,36 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target ipt_secmark_reg = { - .name = "SECMARK", - .target = target, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 0, -}; - -static struct xt_target ip6t_secmark_reg = { - .name = "SECMARK", - .target = target, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET6, - .revision = 0, +static struct xt_target xt_secmark_target[] = { + { + .name = "SECMARK", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "SECMARK", + .family = AF_INET6, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_secmark_init(void) { - int err; - - err = xt_register_target(&ipt_secmark_reg); - if (err) - return err; - - err = xt_register_target(&ip6t_secmark_reg); - if (err) - xt_unregister_target(&ipt_secmark_reg); - - return err; + return xt_register_targets(xt_secmark_target, + ARRAY_SIZE(xt_secmark_target)); } static void __exit xt_secmark_fini(void) { - xt_unregister_target(&ip6t_secmark_reg); - xt_unregister_target(&ipt_secmark_reg); + xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target)); } module_init(xt_secmark_init); diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 197609cb06d7..7db492d65220 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -29,41 +29,32 @@ match(const struct sk_buff *skb, return 1; } -static struct xt_match comment_match = { - .name = "comment", - .match = match, - .matchsize = sizeof(struct xt_comment_info), - .family = AF_INET, - .me = THIS_MODULE -}; - -static struct xt_match comment6_match = { - .name = "comment", - .match = match, - .matchsize = sizeof(struct xt_comment_info), - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_comment_match[] = { + { + .name = "comment", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_comment_info), + .me = THIS_MODULE + }, + { + .name = "comment", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_comment_info), + .me = THIS_MODULE + }, }; static int __init xt_comment_init(void) { - int ret; - - ret = xt_register_match(&comment_match); - if (ret) - return ret; - - ret = xt_register_match(&comment6_match); - if (ret) - xt_unregister_match(&comment_match); - - return ret; + return xt_register_matches(xt_comment_match, + ARRAY_SIZE(xt_comment_match)); } static void __exit xt_comment_fini(void) { - xt_unregister_match(&comment_match); - xt_unregister_match(&comment6_match); + xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match)); } module_init(xt_comment_init); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 1396fe2d07c1..dcc497ea8183 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -125,7 +125,6 @@ static int check(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_connbytes_info *sinfo = matchinfo; @@ -143,40 +142,35 @@ static int check(const char *tablename, return 1; } -static struct xt_match connbytes_match = { - .name = "connbytes", - .match = match, - .checkentry = check, - .matchsize = sizeof(struct xt_connbytes_info), - .family = AF_INET, - .me = THIS_MODULE -}; -static struct xt_match connbytes6_match = { - .name = "connbytes", - .match = match, - .checkentry = check, - .matchsize = sizeof(struct xt_connbytes_info), - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_connbytes_match[] = { + { + .name = "connbytes", + .family = AF_INET, + .checkentry = check, + .match = match, + .matchsize = sizeof(struct xt_connbytes_info), + .me = THIS_MODULE + }, + { + .name = "connbytes", + .family = AF_INET6, + .checkentry = check, + .match = match, + .matchsize = sizeof(struct xt_connbytes_info), + .me = THIS_MODULE + }, }; static int __init xt_connbytes_init(void) { - int ret; - ret = xt_register_match(&connbytes_match); - if (ret) - return ret; - - ret = xt_register_match(&connbytes6_match); - if (ret) - xt_unregister_match(&connbytes_match); - return ret; + return xt_register_matches(xt_connbytes_match, + ARRAY_SIZE(xt_connbytes_match)); } static void __exit xt_connbytes_fini(void) { - xt_unregister_match(&connbytes_match); - xt_unregister_match(&connbytes6_match); + xt_unregister_matches(xt_connbytes_match, + ARRAY_SIZE(xt_connbytes_match)); } module_init(xt_connbytes_init); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 56324c8aff0a..92a5726ef237 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -55,7 +55,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_connmark_info *cm = matchinfo; @@ -75,53 +74,80 @@ checkentry(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); #endif } -static struct xt_match connmark_match = { - .name = "connmark", - .match = match, - .matchsize = sizeof(struct xt_connmark_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET, - .me = THIS_MODULE +#ifdef CONFIG_COMPAT +struct compat_xt_connmark_info { + compat_ulong_t mark, mask; + u_int8_t invert; + u_int8_t __pad1; + u_int16_t __pad2; }; -static struct xt_match connmark6_match = { - .name = "connmark", - .match = match, - .matchsize = sizeof(struct xt_connmark_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET6, - .me = THIS_MODULE +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_connmark_info *cm = src; + struct xt_connmark_info m = { + .mark = cm->mark, + .mask = cm->mask, + .invert = cm->invert, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_connmark_info *m = src; + struct compat_xt_connmark_info cm = { + .mark = m->mark, + .mask = m->mask, + .invert = m->invert, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + +static struct xt_match xt_connmark_match[] = { + { + .name = "connmark", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_connmark_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_connmark_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif + .me = THIS_MODULE + }, + { + .name = "connmark", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_connmark_info), + .me = THIS_MODULE + }, }; static int __init xt_connmark_init(void) { - int ret; - need_conntrack(); - - ret = xt_register_match(&connmark_match); - if (ret) - return ret; - - ret = xt_register_match(&connmark6_match); - if (ret) - xt_unregister_match(&connmark_match); - return ret; + return xt_register_matches(xt_connmark_match, + ARRAY_SIZE(xt_connmark_match)); } static void __exit xt_connmark_fini(void) { - xt_unregister_match(&connmark6_match); - xt_unregister_match(&connmark_match); + xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); } module_init(xt_connmark_init); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 145489a4c3f2..0ea501a2fda5 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -45,7 +45,7 @@ match(const struct sk_buff *skb, ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); -#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) +#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg)) if (ct == &ip_conntrack_untracked) statebit = XT_CONNTRACK_STATE_UNTRACKED; @@ -54,63 +54,72 @@ match(const struct sk_buff *skb, else statebit = XT_CONNTRACK_STATE_INVALID; - if(sinfo->flags & XT_CONNTRACK_STATE) { + if (sinfo->flags & XT_CONNTRACK_STATE) { if (ct) { - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip) + if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_SNAT; - - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip) + if (test_bit(IPS_DST_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_DNAT; } - - if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_PROTO) { - if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) + if (FWINV((statebit & sinfo->statemask) == 0, + XT_CONNTRACK_STATE)) return 0; } - if(sinfo->flags & XT_CONNTRACK_ORIGDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) + if (ct == NULL) { + if (sinfo->flags & ~XT_CONNTRACK_STATE) return 0; + return 1; } - if(sinfo->flags & XT_CONNTRACK_REPLSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_PROTO && + FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, + XT_CONNTRACK_PROTO)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_ORIGSRC && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip & + sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, + XT_CONNTRACK_ORIGSRC)) + return 0; - if(sinfo->flags & XT_CONNTRACK_REPLDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_ORIGDST && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip & + sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, + XT_CONNTRACK_ORIGDST)) + return 0; - if(sinfo->flags & XT_CONNTRACK_STATUS) { - if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_REPLSRC && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip & + sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].src.ip, + XT_CONNTRACK_REPLSRC)) + return 0; - if(sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires; + if (sinfo->flags & XT_CONNTRACK_REPLDST && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip & + sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, + XT_CONNTRACK_REPLDST)) + return 0; - if(!ct) - return 0; + if (sinfo->flags & XT_CONNTRACK_STATUS && + FWINV((ct->status & sinfo->statusmask) == 0, + XT_CONNTRACK_STATUS)) + return 0; - expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; + if (sinfo->flags & XT_CONNTRACK_EXPIRES) { + unsigned long expires = timer_pending(&ct->timeout) ? + (ct->timeout.expires - jiffies)/HZ : 0; - if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) + if (FWINV(!(expires >= sinfo->expires_min && + expires <= sinfo->expires_max), + XT_CONNTRACK_EXPIRES)) return 0; } - return 1; } @@ -141,63 +150,72 @@ match(const struct sk_buff *skb, else statebit = XT_CONNTRACK_STATE_INVALID; - if(sinfo->flags & XT_CONNTRACK_STATE) { + if (sinfo->flags & XT_CONNTRACK_STATE) { if (ct) { - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip) + if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_SNAT; - - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip) + if (test_bit(IPS_DST_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_DNAT; } - - if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_PROTO) { - if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) + if (FWINV((statebit & sinfo->statemask) == 0, + XT_CONNTRACK_STATE)) return 0; } - if(sinfo->flags & XT_CONNTRACK_ORIGDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) + if (ct == NULL) { + if (sinfo->flags & ~XT_CONNTRACK_STATE) return 0; + return 1; } - if(sinfo->flags & XT_CONNTRACK_REPLSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_PROTO && + FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, + XT_CONNTRACK_PROTO)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_ORIGSRC && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & + sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, + XT_CONNTRACK_ORIGSRC)) + return 0; - if(sinfo->flags & XT_CONNTRACK_REPLDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_ORIGDST && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & + sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, + XT_CONNTRACK_ORIGDST)) + return 0; - if(sinfo->flags & XT_CONNTRACK_STATUS) { - if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_REPLSRC && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & + sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].src.ip, + XT_CONNTRACK_REPLSRC)) + return 0; - if(sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires; + if (sinfo->flags & XT_CONNTRACK_REPLDST && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & + sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, + XT_CONNTRACK_REPLDST)) + return 0; - if(!ct) - return 0; + if (sinfo->flags & XT_CONNTRACK_STATUS && + FWINV((ct->status & sinfo->statusmask) == 0, + XT_CONNTRACK_STATUS)) + return 0; - expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; + if(sinfo->flags & XT_CONNTRACK_EXPIRES) { + unsigned long expires = timer_pending(&ct->timeout) ? + (ct->timeout.expires - jiffies)/HZ : 0; - if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) + if (FWINV(!(expires >= sinfo->expires_min && + expires <= sinfo->expires_max), + XT_CONNTRACK_EXPIRES)) return 0; } - return 1; } @@ -208,7 +226,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -221,8 +238,7 @@ checkentry(const char *tablename, return 1; } -static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +static void destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); @@ -241,11 +257,8 @@ static struct xt_match conntrack_match = { static int __init xt_conntrack_init(void) { - int ret; need_conntrack(); - ret = xt_register_match(&conntrack_match); - - return ret; + return xt_register_match(&conntrack_match); } static void __exit xt_conntrack_fini(void) diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 2e2f825dad4c..3e6cf430e518 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -131,7 +131,6 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_dccp_info *info = matchinfo; @@ -141,27 +140,26 @@ checkentry(const char *tablename, && !(info->invflags & ~info->flags); } -static struct xt_match dccp_match = -{ - .name = "dccp", - .match = match, - .matchsize = sizeof(struct xt_dccp_info), - .proto = IPPROTO_DCCP, - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, +static struct xt_match xt_dccp_match[] = { + { + .name = "dccp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_dccp_info), + .proto = IPPROTO_DCCP, + .me = THIS_MODULE, + }, + { + .name = "dccp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_dccp_info), + .proto = IPPROTO_DCCP, + .me = THIS_MODULE, + }, }; -static struct xt_match dccp6_match = -{ - .name = "dccp", - .match = match, - .matchsize = sizeof(struct xt_dccp_info), - .proto = IPPROTO_DCCP, - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, -}; - static int __init xt_dccp_init(void) { @@ -173,27 +171,19 @@ static int __init xt_dccp_init(void) dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); if (!dccp_optbuf) return -ENOMEM; - ret = xt_register_match(&dccp_match); + ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); if (ret) goto out_kfree; - ret = xt_register_match(&dccp6_match); - if (ret) - goto out_unreg; - return ret; -out_unreg: - xt_unregister_match(&dccp_match); out_kfree: kfree(dccp_optbuf); - return ret; } static void __exit xt_dccp_fini(void) { - xt_unregister_match(&dccp6_match); - xt_unregister_match(&dccp_match); + xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); kfree(dccp_optbuf); } diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c new file mode 100644 index 000000000000..26c7f4ad102a --- /dev/null +++ b/net/netfilter/xt_dscp.c @@ -0,0 +1,103 @@ +/* IP tables module for matching the value of the IPv4/IPv6 DSCP field + * + * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp + * + * (C) 2002 by Harald Welte <laforge@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/dsfield.h> + +#include <linux/netfilter/xt_dscp.h> +#include <linux/netfilter/x_tables.h> + +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_DESCRIPTION("x_tables DSCP matching module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_dscp"); +MODULE_ALIAS("ip6t_dscp"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static int match6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static int checkentry(const char *tablename, + const void *info, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; + + if (dscp > XT_DSCP_MAX) { + printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); + return 0; + } + + return 1; +} + +static struct xt_match xt_dscp_match[] = { + { + .name = "dscp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_dscp_info), + .me = THIS_MODULE, + }, + { + .name = "dscp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match6, + .matchsize = sizeof(struct xt_dscp_info), + .me = THIS_MODULE, + }, +}; + +static int __init xt_dscp_match_init(void) +{ + return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); +} + +static void __exit xt_dscp_match_fini(void) +{ + xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); +} + +module_init(xt_dscp_match_init); +module_exit(xt_dscp_match_fini); diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 9dad6281e0c1..7c95f149d942 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -79,7 +79,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct xt_esp *espinfo = matchinfo; @@ -92,44 +91,35 @@ checkentry(const char *tablename, return 1; } -static struct xt_match esp_match = { - .name = "esp", - .family = AF_INET, - .proto = IPPROTO_ESP, - .match = &match, - .matchsize = sizeof(struct xt_esp), - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static struct xt_match esp6_match = { - .name = "esp", - .family = AF_INET6, - .proto = IPPROTO_ESP, - .match = &match, - .matchsize = sizeof(struct xt_esp), - .checkentry = &checkentry, - .me = THIS_MODULE, +static struct xt_match xt_esp_match[] = { + { + .name = "esp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_esp), + .proto = IPPROTO_ESP, + .me = THIS_MODULE, + }, + { + .name = "esp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_esp), + .proto = IPPROTO_ESP, + .me = THIS_MODULE, + }, }; static int __init xt_esp_init(void) { - int ret; - ret = xt_register_match(&esp_match); - if (ret) - return ret; - - ret = xt_register_match(&esp6_match); - if (ret) - xt_unregister_match(&esp_match); - - return ret; + return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); } static void __exit xt_esp_cleanup(void) { - xt_unregister_match(&esp_match); - xt_unregister_match(&esp6_match); + xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); } module_init(xt_esp_init); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 799c2a43e3b9..5d7818b73e3a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -139,7 +139,6 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_helper_info *info = matchinfo; @@ -156,52 +155,44 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); #endif } -static struct xt_match helper_match = { - .name = "helper", - .match = match, - .matchsize = sizeof(struct xt_helper_info), - .checkentry = check, - .destroy = destroy, - .family = AF_INET, - .me = THIS_MODULE, -}; -static struct xt_match helper6_match = { - .name = "helper", - .match = match, - .matchsize = sizeof(struct xt_helper_info), - .checkentry = check, - .destroy = destroy, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_helper_match[] = { + { + .name = "helper", + .family = AF_INET, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_helper_info), + .me = THIS_MODULE, + }, + { + .name = "helper", + .family = AF_INET6, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_helper_info), + .me = THIS_MODULE, + }, }; static int __init xt_helper_init(void) { - int ret; need_conntrack(); - - ret = xt_register_match(&helper_match); - if (ret < 0) - return ret; - - ret = xt_register_match(&helper6_match); - if (ret < 0) - xt_unregister_match(&helper_match); - - return ret; + return xt_register_matches(xt_helper_match, + ARRAY_SIZE(xt_helper_match)); } static void __exit xt_helper_fini(void) { - xt_unregister_match(&helper_match); - xt_unregister_match(&helper6_match); + xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); } module_init(xt_helper_init); diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 109132c9a146..67fd30d9f303 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -52,39 +52,32 @@ match6(const struct sk_buff *skb, return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static struct xt_match length_match = { - .name = "length", - .match = match, - .matchsize = sizeof(struct xt_length_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match length6_match = { - .name = "length", - .match = match6, - .matchsize = sizeof(struct xt_length_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_length_match[] = { + { + .name = "length", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_length_info), + .me = THIS_MODULE, + }, + { + .name = "length", + .family = AF_INET6, + .match = match6, + .matchsize = sizeof(struct xt_length_info), + .me = THIS_MODULE, + }, }; static int __init xt_length_init(void) { - int ret; - ret = xt_register_match(&length_match); - if (ret) - return ret; - ret = xt_register_match(&length6_match); - if (ret) - xt_unregister_match(&length_match); - - return ret; + return xt_register_matches(xt_length_match, + ARRAY_SIZE(xt_length_match)); } static void __exit xt_length_fini(void) { - xt_unregister_match(&length_match); - xt_unregister_match(&length6_match); + xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match)); } module_init(xt_length_init); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index ce7fdb7e4e07..fda7b7dec27d 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_rateinfo *r = matchinfo; @@ -123,55 +122,95 @@ ipt_limit_checkentry(const char *tablename, return 0; } - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - r->prev = jiffies; - r->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ - r->cost = user2credits(r->avg); - /* For SMP, we only want to use one set of counters. */ r->master = r; - + if (r->cost == 0) { + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + r->prev = jiffies; + r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->cost = user2credits(r->avg); + } return 1; } -static struct xt_match ipt_limit_reg = { - .name = "limit", - .match = ipt_limit_match, - .matchsize = sizeof(struct xt_rateinfo), - .checkentry = ipt_limit_checkentry, - .family = AF_INET, - .me = THIS_MODULE, +#ifdef CONFIG_COMPAT +struct compat_xt_rateinfo { + u_int32_t avg; + u_int32_t burst; + + compat_ulong_t prev; + u_int32_t credit; + u_int32_t credit_cap, cost; + + u_int32_t master; }; -static struct xt_match limit6_reg = { - .name = "limit", - .match = ipt_limit_match, - .matchsize = sizeof(struct xt_rateinfo), - .checkentry = ipt_limit_checkentry, - .family = AF_INET6, - .me = THIS_MODULE, + +/* To keep the full "prev" timestamp, the upper 32 bits are stored in the + * master pointer, which does not need to be preserved. */ +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_rateinfo *cm = src; + struct xt_rateinfo m = { + .avg = cm->avg, + .burst = cm->burst, + .prev = cm->prev | (unsigned long)cm->master << 32, + .credit = cm->credit, + .credit_cap = cm->credit_cap, + .cost = cm->cost, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_rateinfo *m = src; + struct compat_xt_rateinfo cm = { + .avg = m->avg, + .burst = m->burst, + .prev = m->prev, + .credit = m->credit, + .credit_cap = m->credit_cap, + .cost = m->cost, + .master = m->prev >> 32, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + +static struct xt_match xt_limit_match[] = { + { + .name = "limit", + .family = AF_INET, + .checkentry = ipt_limit_checkentry, + .match = ipt_limit_match, + .matchsize = sizeof(struct xt_rateinfo), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_rateinfo), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif + .me = THIS_MODULE, + }, + { + .name = "limit", + .family = AF_INET6, + .checkentry = ipt_limit_checkentry, + .match = ipt_limit_match, + .matchsize = sizeof(struct xt_rateinfo), + .me = THIS_MODULE, + }, }; static int __init xt_limit_init(void) { - int ret; - - ret = xt_register_match(&ipt_limit_reg); - if (ret) - return ret; - - ret = xt_register_match(&limit6_reg); - if (ret) - xt_unregister_match(&ipt_limit_reg); - - return ret; + return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); } static void __exit xt_limit_fini(void) { - xt_unregister_match(&ipt_limit_reg); - xt_unregister_match(&limit6_reg); + xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); } module_init(xt_limit_init); diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 356290ffe386..425fc21e31f5 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -43,43 +43,37 @@ match(const struct sk_buff *skb, ^ info->invert)); } -static struct xt_match mac_match = { - .name = "mac", - .match = match, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | - (1 << NF_IP_FORWARD), - .family = AF_INET, - .me = THIS_MODULE, -}; -static struct xt_match mac6_match = { - .name = "mac", - .match = match, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | - (1 << NF_IP_FORWARD), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_mac_match[] = { + { + .name = "mac", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_IP_PRE_ROUTING) | + (1 << NF_IP_LOCAL_IN) | + (1 << NF_IP_FORWARD), + .me = THIS_MODULE, + }, + { + .name = "mac", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_IP_PRE_ROUTING) | + (1 << NF_IP_LOCAL_IN) | + (1 << NF_IP_FORWARD), + .me = THIS_MODULE, + }, }; static int __init xt_mac_init(void) { - int ret; - ret = xt_register_match(&mac_match); - if (ret) - return ret; - - ret = xt_register_match(&mac6_match); - if (ret) - xt_unregister_match(&mac_match); - - return ret; + return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); } static void __exit xt_mac_fini(void) { - xt_unregister_match(&mac_match); - xt_unregister_match(&mac6_match); + xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); } module_init(xt_mac_init); diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 876bc5797738..934dddfbcd23 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -39,7 +39,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_mark_info *minfo = matchinfo; @@ -51,42 +50,69 @@ checkentry(const char *tablename, return 1; } -static struct xt_match mark_match = { - .name = "mark", - .match = match, - .matchsize = sizeof(struct xt_mark_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, +#ifdef CONFIG_COMPAT +struct compat_xt_mark_info { + compat_ulong_t mark, mask; + u_int8_t invert; + u_int8_t __pad1; + u_int16_t __pad2; }; -static struct xt_match mark6_match = { - .name = "mark", - .match = match, - .matchsize = sizeof(struct xt_mark_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, -}; +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_mark_info *cm = src; + struct xt_mark_info m = { + .mark = cm->mark, + .mask = cm->mask, + .invert = cm->invert, + }; + memcpy(dst, &m, sizeof(m)); +} -static int __init xt_mark_init(void) +static int compat_to_user(void __user *dst, void *src) { - int ret; - ret = xt_register_match(&mark_match); - if (ret) - return ret; + struct xt_mark_info *m = src; + struct compat_xt_mark_info cm = { + .mark = m->mark, + .mask = m->mask, + .invert = m->invert, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ - ret = xt_register_match(&mark6_match); - if (ret) - xt_unregister_match(&mark_match); +static struct xt_match xt_mark_match[] = { + { + .name = "mark", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_mark_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_mark_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif + .me = THIS_MODULE, + }, + { + .name = "mark", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_mark_info), + .me = THIS_MODULE, + }, +}; - return ret; +static int __init xt_mark_init(void) +{ + return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); } static void __exit xt_mark_fini(void) { - xt_unregister_match(&mark_match); - xt_unregister_match(&mark6_match); + xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); } module_init(xt_mark_init); diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 1ff0a25396e7..d3aefd380930 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -176,7 +176,6 @@ checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -191,7 +190,6 @@ checkentry_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -206,7 +204,6 @@ checkentry6(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; @@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; @@ -231,84 +227,55 @@ checkentry6_v1(const char *tablename, multiinfo->count); } -static struct xt_match multiport_match = { - .name = "multiport", - .revision = 0, - .matchsize = sizeof(struct xt_multiport), - .match = &match, - .checkentry = &checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match multiport_match_v1 = { - .name = "multiport", - .revision = 1, - .matchsize = sizeof(struct xt_multiport_v1), - .match = &match_v1, - .checkentry = &checkentry_v1, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match multiport6_match = { - .name = "multiport", - .revision = 0, - .matchsize = sizeof(struct xt_multiport), - .match = &match, - .checkentry = &checkentry6, - .family = AF_INET6, - .me = THIS_MODULE, -}; - -static struct xt_match multiport6_match_v1 = { - .name = "multiport", - .revision = 1, - .matchsize = sizeof(struct xt_multiport_v1), - .match = &match_v1, - .checkentry = &checkentry6_v1, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_multiport_match[] = { + { + .name = "multiport", + .family = AF_INET, + .revision = 0, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_multiport), + .me = THIS_MODULE, + }, + { + .name = "multiport", + .family = AF_INET, + .revision = 1, + .checkentry = checkentry_v1, + .match = match_v1, + .matchsize = sizeof(struct xt_multiport_v1), + .me = THIS_MODULE, + }, + { + .name = "multiport", + .family = AF_INET6, + .revision = 0, + .checkentry = checkentry6, + .match = match, + .matchsize = sizeof(struct xt_multiport), + .me = THIS_MODULE, + }, + { + .name = "multiport", + .family = AF_INET6, + .revision = 1, + .checkentry = checkentry6_v1, + .match = match_v1, + .matchsize = sizeof(struct xt_multiport_v1), + .me = THIS_MODULE, + }, }; static int __init xt_multiport_init(void) { - int ret; - - ret = xt_register_match(&multiport_match); - if (ret) - goto out; - - ret = xt_register_match(&multiport_match_v1); - if (ret) - goto out_unreg_multi_v0; - - ret = xt_register_match(&multiport6_match); - if (ret) - goto out_unreg_multi_v1; - - ret = xt_register_match(&multiport6_match_v1); - if (ret) - goto out_unreg_multi6_v0; - - return ret; - -out_unreg_multi6_v0: - xt_unregister_match(&multiport6_match); -out_unreg_multi_v1: - xt_unregister_match(&multiport_match_v1); -out_unreg_multi_v0: - xt_unregister_match(&multiport_match); -out: - return ret; + return xt_register_matches(xt_multiport_match, + ARRAY_SIZE(xt_multiport_match)); } static void __exit xt_multiport_fini(void) { - xt_unregister_match(&multiport_match); - xt_unregister_match(&multiport_match_v1); - xt_unregister_match(&multiport6_match); - xt_unregister_match(&multiport6_match_v1); + xt_unregister_matches(xt_multiport_match, + ARRAY_SIZE(xt_multiport_match)); } module_init(xt_multiport_init); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 5fe4c9df17f5..fd8f954cded5 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/xt_physdev.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge.h> @@ -105,7 +106,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_physdev_info *info = matchinfo; @@ -113,46 +113,52 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return 0; + if (brnf_deferred_hooks == 0 && + info->bitmask & XT_PHYSDEV_OP_OUT && + (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || + info->invert & XT_PHYSDEV_OP_BRIDGED) && + hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING))) { + printk(KERN_WARNING "physdev match: using --physdev-out in the " + "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " + "traffic is deprecated and breaks other things, it will " + "be removed in January 2007. See Documentation/" + "feature-removal-schedule.txt for details. This doesn't " + "affect you in case you're using it for purely bridged " + "traffic.\n"); + brnf_deferred_hooks = 1; + } return 1; } -static struct xt_match physdev_match = { - .name = "physdev", - .match = match, - .matchsize = sizeof(struct xt_physdev_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match physdev6_match = { - .name = "physdev", - .match = match, - .matchsize = sizeof(struct xt_physdev_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_physdev_match[] = { + { + .name = "physdev", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, + }, + { + .name = "physdev", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, + }, }; static int __init xt_physdev_init(void) { - int ret; - - ret = xt_register_match(&physdev_match); - if (ret < 0) - return ret; - - ret = xt_register_match(&physdev6_match); - if (ret < 0) - xt_unregister_match(&physdev_match); - - return ret; + return xt_register_matches(xt_physdev_match, + ARRAY_SIZE(xt_physdev_match)); } static void __exit xt_physdev_fini(void) { - xt_unregister_match(&physdev_match); - xt_unregister_match(&physdev6_match); + xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match)); } module_init(xt_physdev_init); diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 3ac703b5cb8f..16e7b0804287 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -9,6 +9,8 @@ #include <linux/skbuff.h> #include <linux/if_ether.h> #include <linux/if_packet.h> +#include <linux/in.h> +#include <linux/ip.h> #include <linux/netfilter/xt_pkttype.h> #include <linux/netfilter/x_tables.h> @@ -28,45 +30,45 @@ static int match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { + u_int8_t type; const struct xt_pkttype_info *info = matchinfo; - return (skb->pkt_type == info->pkttype) ^ info->invert; -} + if (skb->pkt_type == PACKET_LOOPBACK) + type = (MULTICAST(skb->nh.iph->daddr) + ? PACKET_MULTICAST + : PACKET_BROADCAST); + else + type = skb->pkt_type; -static struct xt_match pkttype_match = { - .name = "pkttype", - .match = match, - .matchsize = sizeof(struct xt_pkttype_info), - .family = AF_INET, - .me = THIS_MODULE, -}; + return (type == info->pkttype) ^ info->invert; +} -static struct xt_match pkttype6_match = { - .name = "pkttype", - .match = match, - .matchsize = sizeof(struct xt_pkttype_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_pkttype_match[] = { + { + .name = "pkttype", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_pkttype_info), + .me = THIS_MODULE, + }, + { + .name = "pkttype", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_pkttype_info), + .me = THIS_MODULE, + }, }; static int __init xt_pkttype_init(void) { - int ret; - ret = xt_register_match(&pkttype_match); - if (ret) - return ret; - - ret = xt_register_match(&pkttype6_match); - if (ret) - xt_unregister_match(&pkttype_match); - - return ret; + return xt_register_matches(xt_pkttype_match, + ARRAY_SIZE(xt_pkttype_match)); } static void __exit xt_pkttype_fini(void) { - xt_unregister_match(&pkttype_match); - xt_unregister_match(&pkttype6_match); + xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match)); } module_init(xt_pkttype_init); diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index a3aa62fbda6f..46bde2b1e1e0 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -8,7 +8,6 @@ */ #include <linux/kernel.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/init.h> @@ -136,8 +135,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { struct xt_policy_info *info = matchinfo; @@ -166,43 +164,34 @@ static int checkentry(const char *tablename, const void *ip_void, return 1; } -static struct xt_match policy_match = { - .name = "policy", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_policy_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match policy6_match = { - .name = "policy", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_policy_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_policy_match[] = { + { + .name = "policy", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_policy_info), + .me = THIS_MODULE, + }, + { + .name = "policy", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_policy_info), + .me = THIS_MODULE, + }, }; static int __init init(void) { - int ret; - - ret = xt_register_match(&policy_match); - if (ret) - return ret; - ret = xt_register_match(&policy6_match); - if (ret) - xt_unregister_match(&policy_match); - return ret; + return xt_register_matches(xt_policy_match, + ARRAY_SIZE(xt_policy_match)); } static void __exit fini(void) { - xt_unregister_match(&policy6_match); - xt_unregister_match(&policy_match); + xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match)); } module_init(init); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 4cdba7469dc4..b75fa2c70e66 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -11,6 +11,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); +MODULE_ALIAS("ipt_quota"); +MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); @@ -39,7 +41,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; @@ -50,46 +52,33 @@ checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_match quota_match = { - .name = "quota", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_quota_info), - .checkentry = checkentry, - .me = THIS_MODULE -}; - -static struct xt_match quota_match6 = { - .name = "quota", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_quota_info), - .checkentry = checkentry, - .me = THIS_MODULE +static struct xt_match xt_quota_match[] = { + { + .name = "quota", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .me = THIS_MODULE + }, + { + .name = "quota", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .me = THIS_MODULE + }, }; static int __init xt_quota_init(void) { - int ret; - - ret = xt_register_match("a_match); - if (ret) - goto err1; - ret = xt_register_match("a_match6); - if (ret) - goto err2; - return ret; - -err2: - xt_unregister_match("a_match); -err1: - return ret; + return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); } static void __exit xt_quota_fini(void) { - xt_unregister_match("a_match6); - xt_unregister_match("a_match); + xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); } module_init(xt_quota_init); diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 9316c753692f..7956acaaa24b 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -151,7 +151,7 @@ match(const struct sk_buff *skb, && SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) && (ntohs(sh->dest) <= info->dpts[1])), XT_SCTP_DEST_PORTS, info->flags, info->invflags) - && SCCHECK(match_packet(skb, protoff, + && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), info->chunkmap, info->chunk_match_type, info->flag_info, info->flag_count, hotdrop), @@ -163,7 +163,6 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_sctp_info *info = matchinfo; @@ -178,44 +177,35 @@ checkentry(const char *tablename, | SCTP_CHUNK_MATCH_ONLY))); } -static struct xt_match sctp_match = { - .name = "sctp", - .match = match, - .matchsize = sizeof(struct xt_sctp_info), - .proto = IPPROTO_SCTP, - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE -}; - -static struct xt_match sctp6_match = { - .name = "sctp", - .match = match, - .matchsize = sizeof(struct xt_sctp_info), - .proto = IPPROTO_SCTP, - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_sctp_match[] = { + { + .name = "sctp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_sctp_info), + .proto = IPPROTO_SCTP, + .me = THIS_MODULE + }, + { + .name = "sctp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_sctp_info), + .proto = IPPROTO_SCTP, + .me = THIS_MODULE + }, }; static int __init xt_sctp_init(void) { - int ret; - ret = xt_register_match(&sctp_match); - if (ret) - return ret; - - ret = xt_register_match(&sctp6_match); - if (ret) - xt_unregister_match(&sctp_match); - - return ret; + return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); } static void __exit xt_sctp_fini(void) { - xt_unregister_match(&sctp6_match); - xt_unregister_match(&sctp_match); + xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); } module_init(xt_sctp_init); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index f9e304dc4504..d9010b16a1f9 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -48,7 +48,6 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -62,54 +61,43 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); #endif } -static struct xt_match state_match = { - .name = "state", - .match = match, - .checkentry = check, - .destroy = destroy, - .matchsize = sizeof(struct xt_state_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match state6_match = { - .name = "state", - .match = match, - .checkentry = check, - .destroy = destroy, - .matchsize = sizeof(struct xt_state_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_state_match[] = { + { + .name = "state", + .family = AF_INET, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_state_info), + .me = THIS_MODULE, + }, + { + .name = "state", + .family = AF_INET6, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_state_info), + .me = THIS_MODULE, + }, }; static int __init xt_state_init(void) { - int ret; - need_conntrack(); - - ret = xt_register_match(&state_match); - if (ret < 0) - return ret; - - ret = xt_register_match(&state6_match); - if (ret < 0) - xt_unregister_match(&state_match); - - return ret; + return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } static void __exit xt_state_fini(void) { - xt_unregister_match(&state_match); - xt_unregister_match(&state6_match); + xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } module_init(xt_state_init); diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index de1037f58596..091a9f89f5d5 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -55,7 +55,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; @@ -66,46 +66,35 @@ checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_match statistic_match = { - .name = "statistic", - .match = match, - .matchsize = sizeof(struct xt_statistic_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match statistic_match6 = { - .name = "statistic", - .match = match, - .matchsize = sizeof(struct xt_statistic_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_statistic_match[] = { + { + .name = "statistic", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .me = THIS_MODULE, + }, + { + .name = "statistic", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .me = THIS_MODULE, + }, }; static int __init xt_statistic_init(void) { - int ret; - - ret = xt_register_match(&statistic_match); - if (ret) - goto err1; - - ret = xt_register_match(&statistic_match6); - if (ret) - goto err2; - return ret; -err2: - xt_unregister_match(&statistic_match); -err1: - return ret; + return xt_register_matches(xt_statistic_match, + ARRAY_SIZE(xt_statistic_match)); } static void __exit xt_statistic_fini(void) { - xt_unregister_match(&statistic_match6); - xt_unregister_match(&statistic_match); + xt_unregister_matches(xt_statistic_match, + ARRAY_SIZE(xt_statistic_match)); } module_init(xt_statistic_init); diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0ebb6ac2c8c7..4453252400aa 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -37,7 +37,7 @@ static int match(const struct sk_buff *skb, return (skb_find_text((struct sk_buff *)skb, conf->from_offset, conf->to_offset, conf->config, &state) - != UINT_MAX) && !conf->invert; + != UINT_MAX) ^ conf->invert; } #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) @@ -46,7 +46,6 @@ static int checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_string_info *conf = matchinfo; @@ -55,7 +54,10 @@ static int checkentry(const char *tablename, /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) return 0; - + if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') + return 0; + if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) + return 0; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) @@ -66,49 +68,40 @@ static int checkentry(const char *tablename, return 1; } -static void destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +static void destroy(const struct xt_match *match, void *matchinfo) { textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } -static struct xt_match string_match = { - .name = "string", - .match = match, - .matchsize = sizeof(struct xt_string_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET, - .me = THIS_MODULE -}; -static struct xt_match string6_match = { - .name = "string", - .match = match, - .matchsize = sizeof(struct xt_string_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_string_match[] = { + { + .name = "string", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_string_info), + .me = THIS_MODULE + }, + { + .name = "string", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_string_info), + .me = THIS_MODULE + }, }; static int __init xt_string_init(void) { - int ret; - - ret = xt_register_match(&string_match); - if (ret) - return ret; - ret = xt_register_match(&string6_match); - if (ret) - xt_unregister_match(&string_match); - - return ret; + return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); } static void __exit xt_string_fini(void) { - xt_unregister_match(&string_match); - xt_unregister_match(&string6_match); + xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); } module_init(xt_string_init); diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index cf7d335cadcd..a3682fe2f192 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -18,21 +18,22 @@ #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -#define TH_SYN 0x02 - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); -/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */ -static inline int -mssoption_match(u_int16_t min, u_int16_t max, - const struct sk_buff *skb, - unsigned int protoff, - int invert, - int *hotdrop) +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) { + const struct xt_tcpmss_match_info *info = matchinfo; struct tcphdr _tcph, *th; /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ u8 _opt[15 * 4 - sizeof(_tcph)], *op; @@ -64,72 +65,50 @@ mssoption_match(u_int16_t min, u_int16_t max, mssval = (op[i+2] << 8) | op[i+3]; - return (mssval >= min && mssval <= max) ^ invert; + return (mssval >= info->mss_min && + mssval <= info->mss_max) ^ info->invert; } - if (op[i] < 2) i++; - else i += op[i+1]?:1; + if (op[i] < 2) + i++; + else + i += op[i+1] ? : 1; } out: - return invert; + return info->invert; - dropit: +dropit: *hotdrop = 1; return 0; } -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct xt_tcpmss_match_info *info = matchinfo; - - return mssoption_match(info->mss_min, info->mss_max, skb, protoff, - info->invert, hotdrop); -} - -static struct xt_match tcpmss_match = { - .name = "tcpmss", - .match = match, - .matchsize = sizeof(struct xt_tcpmss_match_info), - .proto = IPPROTO_TCP, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match tcpmss6_match = { - .name = "tcpmss", - .match = match, - .matchsize = sizeof(struct xt_tcpmss_match_info), - .proto = IPPROTO_TCP, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_tcpmss_match[] = { + { + .name = "tcpmss", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_tcpmss_match_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, + { + .name = "tcpmss", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_tcpmss_match_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, }; - static int __init xt_tcpmss_init(void) { - int ret; - ret = xt_register_match(&tcpmss_match); - if (ret) - return ret; - - ret = xt_register_match(&tcpmss6_match); - if (ret) - xt_unregister_match(&tcpmss_match); - - return ret; + return xt_register_matches(xt_tcpmss_match, + ARRAY_SIZE(xt_tcpmss_match)); } static void __exit xt_tcpmss_fini(void) { - xt_unregister_match(&tcpmss6_match); - xt_unregister_match(&tcpmss_match); + xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match)); } module_init(xt_tcpmss_init); diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 1b61dac9c873..e76a68e0bc66 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *tcpinfo = matchinfo; @@ -190,7 +189,6 @@ udp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *udpinfo = matchinfo; @@ -199,81 +197,54 @@ udp_checkentry(const char *tablename, return !(udpinfo->invflags & ~XT_UDP_INV_MASK); } -static struct xt_match tcp_matchstruct = { - .name = "tcp", - .match = tcp_match, - .matchsize = sizeof(struct xt_tcp), - .proto = IPPROTO_TCP, - .family = AF_INET, - .checkentry = tcp_checkentry, - .me = THIS_MODULE, -}; - -static struct xt_match tcp6_matchstruct = { - .name = "tcp", - .match = tcp_match, - .matchsize = sizeof(struct xt_tcp), - .proto = IPPROTO_TCP, - .family = AF_INET6, - .checkentry = tcp_checkentry, - .me = THIS_MODULE, -}; - -static struct xt_match udp_matchstruct = { - .name = "udp", - .match = udp_match, - .matchsize = sizeof(struct xt_udp), - .proto = IPPROTO_UDP, - .family = AF_INET, - .checkentry = udp_checkentry, - .me = THIS_MODULE, -}; -static struct xt_match udp6_matchstruct = { - .name = "udp", - .match = udp_match, - .matchsize = sizeof(struct xt_udp), - .proto = IPPROTO_UDP, - .family = AF_INET6, - .checkentry = udp_checkentry, - .me = THIS_MODULE, +static struct xt_match xt_tcpudp_match[] = { + { + .name = "tcp", + .family = AF_INET, + .checkentry = tcp_checkentry, + .match = tcp_match, + .matchsize = sizeof(struct xt_tcp), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, + { + .name = "tcp", + .family = AF_INET6, + .checkentry = tcp_checkentry, + .match = tcp_match, + .matchsize = sizeof(struct xt_tcp), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, + { + .name = "udp", + .family = AF_INET, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDP, + .me = THIS_MODULE, + }, + { + .name = "udp", + .family = AF_INET6, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDP, + .me = THIS_MODULE, + }, }; static int __init xt_tcpudp_init(void) { - int ret; - ret = xt_register_match(&tcp_matchstruct); - if (ret) - return ret; - - ret = xt_register_match(&tcp6_matchstruct); - if (ret) - goto out_unreg_tcp; - - ret = xt_register_match(&udp_matchstruct); - if (ret) - goto out_unreg_tcp6; - - ret = xt_register_match(&udp6_matchstruct); - if (ret) - goto out_unreg_udp; - - return ret; - -out_unreg_udp: - xt_unregister_match(&tcp_matchstruct); -out_unreg_tcp6: - xt_unregister_match(&tcp6_matchstruct); -out_unreg_tcp: - xt_unregister_match(&tcp_matchstruct); - return ret; + return xt_register_matches(xt_tcpudp_match, + ARRAY_SIZE(xt_tcpudp_match)); } static void __exit xt_tcpudp_fini(void) { - xt_unregister_match(&udp6_matchstruct); - xt_unregister_match(&udp_matchstruct); - xt_unregister_match(&tcp6_matchstruct); - xt_unregister_match(&tcp_matchstruct); + xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match)); } module_init(xt_tcpudp_init); diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig new file mode 100644 index 000000000000..9f7121ae13e9 --- /dev/null +++ b/net/netlabel/Kconfig @@ -0,0 +1,17 @@ +# +# NetLabel configuration +# + +config NETLABEL + bool "NetLabel subsystem support" + depends on NET && SECURITY + default n + ---help--- + NetLabel provides support for explicit network packet labeling + protocols such as CIPSO and RIPSO. For more information see + Documentation/netlabel as well as the NetLabel SourceForge project + for configuration tools and additional documentation. + + * http://netlabel.sf.net + + If you are unsure, say N. diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile new file mode 100644 index 000000000000..8af18c0a47d9 --- /dev/null +++ b/net/netlabel/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the NetLabel subsystem. +# +# Feb 9, 2006, Paul Moore <paul.moore@hp.com> +# + +# base objects +obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o + +# management objects +obj-y += netlabel_mgmt.o + +# protocol modules +obj-y += netlabel_unlabeled.o +obj-y += netlabel_cipso_v4.o + diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c new file mode 100644 index 000000000000..a6ce1d6d5c59 --- /dev/null +++ b/net/netlabel/netlabel_cipso_v4.c @@ -0,0 +1,773 @@ +/* + * NetLabel CIPSO/IPv4 Support + * + * This file defines the CIPSO/IPv4 functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <linux/audit.h> +#include <net/sock.h> +#include <net/netlink.h> +#include <net/genetlink.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> + +#include "netlabel_user.h" +#include "netlabel_cipso_v4.h" + +/* Argument struct for cipso_v4_doi_walk() */ +struct netlbl_cipsov4_doiwalk_arg { + struct netlink_callback *nl_cb; + struct sk_buff *skb; + u32 seq; +}; + +/* NetLabel Generic NETLINK CIPSOv4 family */ +static struct genl_family netlbl_cipsov4_gnl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = NETLBL_NLTYPE_CIPSOV4_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CIPSOV4_A_MAX, +}; + +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { + [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 }, + [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED }, +}; + +/* + * Helper Functions + */ + +/** + * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void netlbl_cipsov4_doi_free(struct rcu_head *entry) +{ + struct cipso_v4_doi *ptr; + + ptr = container_of(entry, struct cipso_v4_doi, rcu); + switch (ptr->type) { + case CIPSO_V4_MAP_STD: + kfree(ptr->map.std->lvl.cipso); + kfree(ptr->map.std->lvl.local); + kfree(ptr->map.std->cat.cipso); + kfree(ptr->map.std->cat.local); + break; + } + kfree(ptr); +} + +/** + * netlbl_cipsov4_add_common - Parse the common sections of a ADD message + * @info: the Generic NETLINK info block + * @doi_def: the CIPSO V4 DOI definition + * + * Description: + * Parse the common sections of a ADD message and fill in the related values + * in @doi_def. Returns zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_add_common(struct genl_info *info, + struct cipso_v4_doi *doi_def) +{ + struct nlattr *nla; + int nla_rem; + u32 iter = 0; + + doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + return -EINVAL; + + nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) + if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { + if (iter > CIPSO_V4_TAG_MAXCNT) + return -EINVAL; + doi_def->tags[iter++] = nla_get_u8(nla); + } + if (iter < CIPSO_V4_TAG_MAXCNT) + doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; + + return 0; +} + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition + * @info: the Generic NETLINK info block + * + * Description: + * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message + * and add it to the CIPSO V4 engine. Return zero on success and non-zero on + * error. + * + */ +static int netlbl_cipsov4_add_std(struct genl_info *info) +{ + int ret_val = -EINVAL; + struct cipso_v4_doi *doi_def = NULL; + struct nlattr *nla_a; + struct nlattr *nla_b; + int nla_a_rem; + int nla_b_rem; + + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] || + !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) + return -EINVAL; + + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + return -EINVAL; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) + return -ENOMEM; + doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); + if (doi_def->map.std == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->type = CIPSO_V4_MAP_STD; + + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) + goto add_std_failure; + + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + nla_for_each_nested(nla_b, nla_a, nla_b_rem) + switch (nla_b->nla_type) { + case NLBL_CIPSOV4_A_MLSLVLLOC: + if (nla_get_u32(nla_b) >= + doi_def->map.std->lvl.local_size) + doi_def->map.std->lvl.local_size = + nla_get_u32(nla_b) + 1; + break; + case NLBL_CIPSOV4_A_MLSLVLREM: + if (nla_get_u32(nla_b) >= + doi_def->map.std->lvl.cipso_size) + doi_def->map.std->lvl.cipso_size = + nla_get_u32(nla_b) + 1; + break; + } + } + if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS || + doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) + goto add_std_failure; + doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->lvl.local == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->lvl.cipso == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + struct nlattr *lvl_loc; + struct nlattr *lvl_rem; + + if (nla_validate_nested(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; + + lvl_loc = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSLVLLOC); + lvl_rem = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSLVLREM); + if (lvl_loc == NULL || lvl_rem == NULL) + goto add_std_failure; + doi_def->map.std->lvl.local[nla_get_u32(lvl_loc)] = + nla_get_u32(lvl_rem); + doi_def->map.std->lvl.cipso[nla_get_u32(lvl_rem)] = + nla_get_u32(lvl_loc); + } + + if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; + + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_validate_nested(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; + nla_for_each_nested(nla_b, nla_a, nla_b_rem) + switch (nla_b->nla_type) { + case NLBL_CIPSOV4_A_MLSCATLOC: + if (nla_get_u32(nla_b) >= + doi_def->map.std->cat.local_size) + doi_def->map.std->cat.local_size = + nla_get_u32(nla_b) + 1; + break; + case NLBL_CIPSOV4_A_MLSCATREM: + if (nla_get_u32(nla_b) >= + doi_def->map.std->cat.cipso_size) + doi_def->map.std->cat.cipso_size = + nla_get_u32(nla_b) + 1; + break; + } + } + if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS || + doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) + goto add_std_failure; + doi_def->map.std->cat.local = kcalloc( + doi_def->map.std->cat.local_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->cat.local == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->map.std->cat.cipso = kcalloc( + doi_def->map.std->cat.cipso_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->cat.cipso == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + nla_for_each_nested(nla_a, + info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + nla_a_rem) + if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + struct nlattr *cat_loc; + struct nlattr *cat_rem; + + cat_loc = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSCATLOC); + cat_rem = nla_find_nested(nla_a, + NLBL_CIPSOV4_A_MLSCATREM); + if (cat_loc == NULL || cat_rem == NULL) + goto add_std_failure; + doi_def->map.std->cat.local[ + nla_get_u32(cat_loc)] = + nla_get_u32(cat_rem); + doi_def->map.std->cat.cipso[ + nla_get_u32(cat_rem)] = + nla_get_u32(cat_loc); + } + } + + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_std_failure; + return 0; + +add_std_failure: + if (doi_def) + netlbl_cipsov4_doi_free(&doi_def->rcu); + return ret_val; +} + +/** + * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition + * @info: the Generic NETLINK info block + * + * Description: + * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message + * and add it to the CIPSO V4 engine. Return zero on success and non-zero on + * error. + * + */ +static int netlbl_cipsov4_add_pass(struct genl_info *info) +{ + int ret_val; + struct cipso_v4_doi *doi_def = NULL; + + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) + return -EINVAL; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) + return -ENOMEM; + doi_def->type = CIPSO_V4_MAP_PASS; + + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) + goto add_pass_failure; + + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_pass_failure; + return 0; + +add_pass_failure: + netlbl_cipsov4_doi_free(&doi_def->rcu); + return ret_val; +} + +/** + * netlbl_cipsov4_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Create a new DOI definition based on the given ADD message and add it to the + * CIPSO V4 engine. Returns zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) + +{ + int ret_val = -EINVAL; + u32 type; + u32 doi; + const char *type_str = "(unknown)"; + struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; + + if (!info->attrs[NLBL_CIPSOV4_A_DOI] || + !info->attrs[NLBL_CIPSOV4_A_MTYPE]) + return -EINVAL; + + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + + type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); + switch (type) { + case CIPSO_V4_MAP_STD: + type_str = "std"; + ret_val = netlbl_cipsov4_add_std(info); + break; + case CIPSO_V4_MAP_PASS: + type_str = "pass"; + ret_val = netlbl_cipsov4_add_pass(info); + break; + } + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, + type_str, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + return ret_val; +} + +/** + * netlbl_cipsov4_list - Handle a LIST message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LIST message and respond accordingly. While the + * response message generated by the kernel is straightforward, determining + * before hand the size of the buffer to allocate is not (we have to generate + * the message to know the size). In order to keep this function sane what we + * do is allocate a buffer of NLMSG_GOODSIZE and try to fit the response in + * that size, if we fail then we restart with a larger buffer and try again. + * We continue in this manner until we hit a limit of failed attempts then we + * give up and just send an error message. Returns zero on success and + * negative values on error. + * + */ +static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val; + struct sk_buff *ans_skb = NULL; + u32 nlsze_mult = 1; + void *data; + u32 doi; + struct nlattr *nla_a; + struct nlattr *nla_b; + struct cipso_v4_doi *doi_def; + u32 iter; + + if (!info->attrs[NLBL_CIPSOV4_A_DOI]) { + ret_val = -EINVAL; + goto list_failure; + } + +list_start: + ans_skb = nlmsg_new(NLMSG_GOODSIZE * nlsze_mult, GFP_KERNEL); + if (ans_skb == NULL) { + ret_val = -ENOMEM; + goto list_failure; + } + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_cipsov4_gnl_family.id, + 0, + NLBL_CIPSOV4_C_LIST); + if (data == NULL) { + ret_val = -ENOMEM; + goto list_failure; + } + + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) { + ret_val = -EINVAL; + goto list_failure; + } + + ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); + if (ret_val != 0) + goto list_failure_lock; + + nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST); + if (nla_a == NULL) { + ret_val = -ENOMEM; + goto list_failure_lock; + } + for (iter = 0; + iter < CIPSO_V4_TAG_MAXCNT && + doi_def->tags[iter] != CIPSO_V4_TAG_INVALID; + iter++) { + ret_val = nla_put_u8(ans_skb, + NLBL_CIPSOV4_A_TAG, + doi_def->tags[iter]); + if (ret_val != 0) + goto list_failure_lock; + } + nla_nest_end(ans_skb, nla_a); + + switch (doi_def->type) { + case CIPSO_V4_MAP_STD: + nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); + if (nla_a == NULL) { + ret_val = -ENOMEM; + goto list_failure_lock; + } + for (iter = 0; + iter < doi_def->map.std->lvl.local_size; + iter++) { + if (doi_def->map.std->lvl.local[iter] == + CIPSO_V4_INV_LVL) + continue; + + nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL); + if (nla_b == NULL) { + ret_val = -ENOMEM; + goto list_retry; + } + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSLVLLOC, + iter); + if (ret_val != 0) + goto list_retry; + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSLVLREM, + doi_def->map.std->lvl.local[iter]); + if (ret_val != 0) + goto list_retry; + nla_nest_end(ans_skb, nla_b); + } + nla_nest_end(ans_skb, nla_a); + + nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST); + if (nla_a == NULL) { + ret_val = -ENOMEM; + goto list_retry; + } + for (iter = 0; + iter < doi_def->map.std->cat.local_size; + iter++) { + if (doi_def->map.std->cat.local[iter] == + CIPSO_V4_INV_CAT) + continue; + + nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT); + if (nla_b == NULL) { + ret_val = -ENOMEM; + goto list_retry; + } + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSCATLOC, + iter); + if (ret_val != 0) + goto list_retry; + ret_val = nla_put_u32(ans_skb, + NLBL_CIPSOV4_A_MLSCATREM, + doi_def->map.std->cat.local[iter]); + if (ret_val != 0) + goto list_retry; + nla_nest_end(ans_skb, nla_b); + } + nla_nest_end(ans_skb, nla_a); + + break; + } + rcu_read_unlock(); + + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + if (ret_val != 0) + goto list_failure; + + return 0; + +list_retry: + /* XXX - this limit is a guesstimate */ + if (nlsze_mult < 4) { + rcu_read_unlock(); + kfree_skb(ans_skb); + nlsze_mult++; + goto list_start; + } +list_failure_lock: + rcu_read_unlock(); +list_failure: + kfree_skb(ans_skb); + return ret_val; +} + +/** + * netlbl_cipsov4_listall_cb - cipso_v4_doi_walk() callback for LISTALL + * @doi_def: the CIPSOv4 DOI definition + * @arg: the netlbl_cipsov4_doiwalk_arg structure + * + * Description: + * This function is designed to be used as a callback to the + * cipso_v4_doi_walk() function for use in generating a response for a LISTALL + * message. Returns the size of the message on success, negative values on + * failure. + * + */ +static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) +{ + int ret_val = -ENOMEM; + struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; + void *data; + + data = netlbl_netlink_hdr_put(cb_arg->skb, + NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, + netlbl_cipsov4_gnl_family.id, + NLM_F_MULTI, + NLBL_CIPSOV4_C_LISTALL); + if (data == NULL) + goto listall_cb_failure; + + ret_val = nla_put_u32(cb_arg->skb, NLBL_CIPSOV4_A_DOI, doi_def->doi); + if (ret_val != 0) + goto listall_cb_failure; + ret_val = nla_put_u32(cb_arg->skb, + NLBL_CIPSOV4_A_MTYPE, + doi_def->type); + if (ret_val != 0) + goto listall_cb_failure; + + return genlmsg_end(cb_arg->skb, data); + +listall_cb_failure: + genlmsg_cancel(cb_arg->skb, data); + return ret_val; +} + +/** + * netlbl_cipsov4_listall - Handle a LISTALL message + * @skb: the NETLINK buffer + * @cb: the NETLINK callback + * + * Description: + * Process a user generated LISTALL message and respond accordingly. Returns + * zero on success and negative values on error. + * + */ +static int netlbl_cipsov4_listall(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct netlbl_cipsov4_doiwalk_arg cb_arg; + int doi_skip = cb->args[0]; + + cb_arg.nl_cb = cb; + cb_arg.skb = skb; + cb_arg.seq = cb->nlh->nlmsg_seq; + + cipso_v4_doi_walk(&doi_skip, netlbl_cipsov4_listall_cb, &cb_arg); + + cb->args[0] = doi_skip; + return skb->len; +} + +/** + * netlbl_cipsov4_remove - Handle a REMOVE message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated REMOVE message and respond accordingly. Returns + * zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + u32 doi = 0; + struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; + + if (!info->attrs[NLBL_CIPSOV4_A_DOI]) + return -EINVAL; + + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = cipso_v4_doi_remove(doi, + &audit_info, + netlbl_cipsov4_doi_free); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u res=%u", + doi, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + return ret_val; +} + +/* + * NetLabel Generic NETLINK Command Definitions + */ + +static struct genl_ops netlbl_cipsov4_genl_c_add = { + .cmd = NLBL_CIPSOV4_C_ADD, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_cipsov4_genl_policy, + .doit = netlbl_cipsov4_add, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_cipsov4_genl_c_remove = { + .cmd = NLBL_CIPSOV4_C_REMOVE, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_cipsov4_genl_policy, + .doit = netlbl_cipsov4_remove, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_cipsov4_genl_c_list = { + .cmd = NLBL_CIPSOV4_C_LIST, + .flags = 0, + .policy = netlbl_cipsov4_genl_policy, + .doit = netlbl_cipsov4_list, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_cipsov4_genl_c_listall = { + .cmd = NLBL_CIPSOV4_C_LISTALL, + .flags = 0, + .policy = netlbl_cipsov4_genl_policy, + .doit = NULL, + .dumpit = netlbl_cipsov4_listall, +}; + +/* + * NetLabel Generic NETLINK Protocol Functions + */ + +/** + * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component + * + * Description: + * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK + * mechanism. Returns zero on success, negative values on failure. + * + */ +int netlbl_cipsov4_genl_init(void) +{ + int ret_val; + + ret_val = genl_register_family(&netlbl_cipsov4_gnl_family); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_add); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_remove); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_list); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_listall); + if (ret_val != 0) + return ret_val; + + return 0; +} diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h new file mode 100644 index 000000000000..f03cf9b78286 --- /dev/null +++ b/net/netlabel/netlabel_cipso_v4.h @@ -0,0 +1,166 @@ +/* + * NetLabel CIPSO/IPv4 Support + * + * This file defines the CIPSO/IPv4 functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_CIPSO_V4 +#define _NETLABEL_CIPSO_V4 + +#include <net/netlabel.h> + +/* + * The following NetLabel payloads are supported by the CIPSO subsystem. + * + * o ADD: + * Sent by an application to add a new DOI mapping table. + * + * Required attributes: + * + * NLBL_CIPSOV4_A_DOI + * NLBL_CIPSOV4_A_MTYPE + * NLBL_CIPSOV4_A_TAGLST + * + * If using CIPSO_V4_MAP_STD the following attributes are required: + * + * NLBL_CIPSOV4_A_MLSLVLLST + * NLBL_CIPSOV4_A_MLSCATLST + * + * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * + * o REMOVE: + * Sent by an application to remove a specific DOI mapping table from the + * CIPSO V4 system. + * + * Required attributes: + * + * NLBL_CIPSOV4_A_DOI + * + * o LIST: + * Sent by an application to list the details of a DOI definition. On + * success the kernel should send a response using the following format. + * + * Required attributes: + * + * NLBL_CIPSOV4_A_DOI + * + * The valid response message format depends on the type of the DOI mapping, + * the defined formats are shown below. + * + * Required attributes: + * + * NLBL_CIPSOV4_A_MTYPE + * NLBL_CIPSOV4_A_TAGLST + * + * If using CIPSO_V4_MAP_STD the following attributes are required: + * + * NLBL_CIPSOV4_A_MLSLVLLST + * NLBL_CIPSOV4_A_MLSCATLST + * + * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * + * o LISTALL: + * This message is sent by an application to list the valid DOIs on the + * system. When sent by an application there is no payload and the + * NLM_F_DUMP flag should be set. The kernel should respond with a series of + * the following messages. + * + * Required attributes: + * + * NLBL_CIPSOV4_A_DOI + * NLBL_CIPSOV4_A_MTYPE + * + */ + +/* NetLabel CIPSOv4 commands */ +enum { + NLBL_CIPSOV4_C_UNSPEC, + NLBL_CIPSOV4_C_ADD, + NLBL_CIPSOV4_C_REMOVE, + NLBL_CIPSOV4_C_LIST, + NLBL_CIPSOV4_C_LISTALL, + __NLBL_CIPSOV4_C_MAX, +}; +#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) + +/* NetLabel CIPSOv4 attributes */ +enum { + NLBL_CIPSOV4_A_UNSPEC, + NLBL_CIPSOV4_A_DOI, + /* (NLA_U32) + * the DOI value */ + NLBL_CIPSOV4_A_MTYPE, + /* (NLA_U32) + * the mapping table type (defined in the cipso_ipv4.h header as + * CIPSO_V4_MAP_*) */ + NLBL_CIPSOV4_A_TAG, + /* (NLA_U8) + * a CIPSO tag type, meant to be used within a NLBL_CIPSOV4_A_TAGLST + * attribute */ + NLBL_CIPSOV4_A_TAGLST, + /* (NLA_NESTED) + * the CIPSO tag list for the DOI, there must be at least one + * NLBL_CIPSOV4_A_TAG attribute, tags listed first are given higher + * priorirty when sending packets */ + NLBL_CIPSOV4_A_MLSLVLLOC, + /* (NLA_U32) + * the local MLS sensitivity level */ + NLBL_CIPSOV4_A_MLSLVLREM, + /* (NLA_U32) + * the remote MLS sensitivity level */ + NLBL_CIPSOV4_A_MLSLVL, + /* (NLA_NESTED) + * a MLS sensitivity level mapping, must contain only one attribute of + * each of the following types: NLBL_CIPSOV4_A_MLSLVLLOC and + * NLBL_CIPSOV4_A_MLSLVLREM */ + NLBL_CIPSOV4_A_MLSLVLLST, + /* (NLA_NESTED) + * the CIPSO level mappings, there must be at least one + * NLBL_CIPSOV4_A_MLSLVL attribute */ + NLBL_CIPSOV4_A_MLSCATLOC, + /* (NLA_U32) + * the local MLS category */ + NLBL_CIPSOV4_A_MLSCATREM, + /* (NLA_U32) + * the remote MLS category */ + NLBL_CIPSOV4_A_MLSCAT, + /* (NLA_NESTED) + * a MLS category mapping, must contain only one attribute of each of + * the following types: NLBL_CIPSOV4_A_MLSCATLOC and + * NLBL_CIPSOV4_A_MLSCATREM */ + NLBL_CIPSOV4_A_MLSCATLST, + /* (NLA_NESTED) + * the CIPSO category mappings, there must be at least one + * NLBL_CIPSOV4_A_MLSCAT attribute */ + __NLBL_CIPSOV4_A_MAX, +}; +#define NLBL_CIPSOV4_A_MAX (__NLBL_CIPSOV4_A_MAX - 1) + +/* NetLabel protocol functions */ +int netlbl_cipsov4_genl_init(void); + +#endif diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c new file mode 100644 index 000000000000..af4371d3b459 --- /dev/null +++ b/net/netlabel/netlabel_domainhash.c @@ -0,0 +1,448 @@ +/* + * NetLabel Domain Hash Table + * + * This file manages the domain hash table that NetLabel uses to determine + * which network labeling protocol to use for a given domain. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/audit.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> +#include <asm/bug.h> + +#include "netlabel_mgmt.h" +#include "netlabel_domainhash.h" +#include "netlabel_user.h" + +struct netlbl_domhsh_tbl { + struct list_head *tbl; + u32 size; +}; + +/* Domain hash table */ +/* XXX - updates should be so rare that having one spinlock for the entire + * hash table should be okay */ +static DEFINE_SPINLOCK(netlbl_domhsh_lock); +static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; + +/* Default domain mapping */ +static DEFINE_SPINLOCK(netlbl_domhsh_def_lock); +static struct netlbl_dom_map *netlbl_domhsh_def = NULL; + +/* + * Domain Hash Table Helper Functions + */ + +/** + * netlbl_domhsh_free_entry - Frees a domain hash table entry + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to a hash table entry can be released + * safely. + * + */ +static void netlbl_domhsh_free_entry(struct rcu_head *entry) +{ + struct netlbl_dom_map *ptr; + + ptr = container_of(entry, struct netlbl_dom_map, rcu); + kfree(ptr->domain); + kfree(ptr); +} + +/** + * netlbl_domhsh_hash - Hashing function for the domain hash table + * @domain: the domain name to hash + * + * Description: + * This is the hashing function for the domain hash table, it returns the + * correct bucket number for the domain. The caller is responsibile for + * calling the rcu_read_[un]lock() functions. + * + */ +static u32 netlbl_domhsh_hash(const char *key) +{ + u32 iter; + u32 val; + u32 len; + + /* This is taken (with slight modification) from + * security/selinux/ss/symtab.c:symhash() */ + + for (iter = 0, val = 0, len = strlen(key); iter < len; iter++) + val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter]; + return val & (rcu_dereference(netlbl_domhsh)->size - 1); +} + +/** + * netlbl_domhsh_search - Search for a domain entry + * @domain: the domain + * @def: return default if no match is found + * + * Description: + * Searches the domain hash table and returns a pointer to the hash table + * entry if found, otherwise NULL is returned. If @def is non-zero and a + * match is not found in the domain hash table the default mapping is returned + * if it exists. The caller is responsibile for the rcu hash table locks + * (i.e. the caller much call rcu_read_[un]lock()). + * + */ +static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) +{ + u32 bkt; + struct netlbl_dom_map *iter; + + if (domain != NULL) { + bkt = netlbl_domhsh_hash(domain); + list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) + if (iter->valid && strcmp(iter->domain, domain) == 0) + return iter; + } + + if (def != 0) { + iter = rcu_dereference(netlbl_domhsh_def); + if (iter != NULL && iter->valid) + return iter; + } + + return NULL; +} + +/* + * Domain Hash Table Functions + */ + +/** + * netlbl_domhsh_init - Init for the domain hash + * @size: the number of bits to use for the hash buckets + * + * Description: + * Initializes the domain hash table, should be called only by + * netlbl_user_init() during initialization. Returns zero on success, non-zero + * values on error. + * + */ +int netlbl_domhsh_init(u32 size) +{ + u32 iter; + struct netlbl_domhsh_tbl *hsh_tbl; + + if (size == 0) + return -EINVAL; + + hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); + if (hsh_tbl == NULL) + return -ENOMEM; + hsh_tbl->size = 1 << size; + hsh_tbl->tbl = kcalloc(hsh_tbl->size, + sizeof(struct list_head), + GFP_KERNEL); + if (hsh_tbl->tbl == NULL) { + kfree(hsh_tbl); + return -ENOMEM; + } + for (iter = 0; iter < hsh_tbl->size; iter++) + INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); + + rcu_read_lock(); + spin_lock(&netlbl_domhsh_lock); + rcu_assign_pointer(netlbl_domhsh, hsh_tbl); + spin_unlock(&netlbl_domhsh_lock); + rcu_read_unlock(); + + return 0; +} + +/** + * netlbl_domhsh_add - Adds a entry to the domain hash table + * @entry: the entry to add + * @audit_info: NetLabel audit information + * + * Description: + * Adds a new entry to the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) +{ + int ret_val; + u32 bkt; + struct audit_buffer *audit_buf; + char *audit_domain; + + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = 0; + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, + entry->domain); + break; + default: + return -EINVAL; + } + if (ret_val != 0) + return ret_val; + + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + ret_val = 0; + rcu_read_lock(); + if (entry->domain != NULL) { + bkt = netlbl_domhsh_hash(entry->domain); + spin_lock(&netlbl_domhsh_lock); + if (netlbl_domhsh_search(entry->domain, 0) == NULL) + list_add_tail_rcu(&entry->list, + &netlbl_domhsh->tbl[bkt]); + else + ret_val = -EEXIST; + spin_unlock(&netlbl_domhsh_lock); + } else if (entry->domain == NULL) { + INIT_LIST_HEAD(&entry->list); + spin_lock(&netlbl_domhsh_def_lock); + if (rcu_dereference(netlbl_domhsh_def) == NULL) + rcu_assign_pointer(netlbl_domhsh_def, entry); + else + ret_val = -EEXIST; + spin_unlock(&netlbl_domhsh_def_lock); + } else + ret_val = -EINVAL; + + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + entry->type_def.cipsov4->doi); + break; + } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + rcu_read_unlock(); + + if (ret_val != 0) { + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, + entry->domain) != 0) + BUG(); + break; + } + } + + return ret_val; +} + +/** + * netlbl_domhsh_add_default - Adds the default entry to the domain hash table + * @entry: the entry to add + * @audit_info: NetLabel audit information + * + * Description: + * Adds a new default entry to the domain hash table and handles any updates + * to the lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) +{ + return netlbl_domhsh_add(entry, audit_info); +} + +/** + * netlbl_domhsh_remove - Removes an entry from the domain hash table + * @domain: the domain to remove + * @audit_info: NetLabel audit information + * + * Description: + * Removes an entry from the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +{ + int ret_val = -ENOENT; + struct netlbl_dom_map *entry; + struct audit_buffer *audit_buf; + char *audit_domain; + + rcu_read_lock(); + if (domain != NULL) + entry = netlbl_domhsh_search(domain, 0); + else + entry = netlbl_domhsh_search(domain, 1); + if (entry == NULL) + goto remove_return; + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, + entry->domain); + if (ret_val != 0) + goto remove_return; + break; + } + ret_val = 0; + if (entry != rcu_dereference(netlbl_domhsh_def)) { + spin_lock(&netlbl_domhsh_lock); + if (entry->valid) { + entry->valid = 0; + list_del_rcu(&entry->list); + } else + ret_val = -ENOENT; + spin_unlock(&netlbl_domhsh_lock); + } else { + spin_lock(&netlbl_domhsh_def_lock); + if (entry->valid) { + entry->valid = 0; + rcu_assign_pointer(netlbl_domhsh_def, NULL); + } else + ret_val = -ENOENT; + spin_unlock(&netlbl_domhsh_def_lock); + } + + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); + audit_log_format(audit_buf, + " nlbl_domain=%s res=%u", + audit_domain, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + +remove_return: + rcu_read_unlock(); + return ret_val; +} + +/** + * netlbl_domhsh_remove_default - Removes the default entry from the table + * @audit_info: NetLabel audit information + * + * Description: + * Removes/resets the default entry for the domain hash table and handles any + * updates to the lower level protocol handler (i.e. CIPSO). Returns zero on + * success, non-zero on failure. + * + */ +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) +{ + return netlbl_domhsh_remove(NULL, audit_info); +} + +/** + * netlbl_domhsh_getentry - Get an entry from the domain hash table + * @domain: the domain name to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain, + * return a pointer to a copy of the entry or NULL. The caller is responsibile + * for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) +{ + return netlbl_domhsh_search(domain, 1); +} + +/** + * netlbl_domhsh_walk - Iterate through the domain mapping hash table + * @skip_bkt: the number of buckets to skip at the start + * @skip_chain: the number of entries to skip in the first iterated bucket + * @callback: callback for each entry + * @cb_arg: argument for the callback function + * + * Description: + * Interate over the domain mapping hash table, skipping the first @skip_bkt + * buckets and @skip_chain entries. For each entry in the table call + * @callback, if @callback returns a negative value stop 'walking' through the + * table and return. Updates the values in @skip_bkt and @skip_chain on + * return. Returns zero on succcess, negative values on failure. + * + */ +int netlbl_domhsh_walk(u32 *skip_bkt, + u32 *skip_chain, + int (*callback) (struct netlbl_dom_map *entry, void *arg), + void *cb_arg) +{ + int ret_val = -ENOENT; + u32 iter_bkt; + struct netlbl_dom_map *iter_entry; + u32 chain_cnt = 0; + + rcu_read_lock(); + for (iter_bkt = *skip_bkt; + iter_bkt < rcu_dereference(netlbl_domhsh)->size; + iter_bkt++, chain_cnt = 0) { + list_for_each_entry_rcu(iter_entry, + &netlbl_domhsh->tbl[iter_bkt], + list) + if (iter_entry->valid) { + if (chain_cnt++ < *skip_chain) + continue; + ret_val = callback(iter_entry, cb_arg); + if (ret_val < 0) { + chain_cnt--; + goto walk_return; + } + } + } + +walk_return: + rcu_read_unlock(); + *skip_bkt = iter_bkt; + *skip_chain = chain_cnt; + return ret_val; +} diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h new file mode 100644 index 000000000000..3689956c3436 --- /dev/null +++ b/net/netlabel/netlabel_domainhash.h @@ -0,0 +1,71 @@ +/* + * NetLabel Domain Hash Table + * + * This file manages the domain hash table that NetLabel uses to determine + * which network labeling protocol to use for a given domain. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_DOMAINHASH_H +#define _NETLABEL_DOMAINHASH_H + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> + +/* Domain hash table size */ +/* XXX - currently this number is an uneducated guess */ +#define NETLBL_DOMHSH_BITSIZE 7 + +/* Domain mapping definition struct */ +struct netlbl_dom_map { + char *domain; + u32 type; + union { + struct cipso_v4_doi *cipsov4; + } type_def; + + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* init function */ +int netlbl_domhsh_init(u32 size); + +/* Manipulate the domain hash table */ +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); +struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); +int netlbl_domhsh_walk(u32 *skip_bkt, + u32 *skip_chain, + int (*callback) (struct netlbl_dom_map *entry, void *arg), + void *cb_arg); + +#endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c new file mode 100644 index 000000000000..54fb7de3c2b1 --- /dev/null +++ b/net/netlabel/netlabel_kapi.c @@ -0,0 +1,254 @@ +/* + * NetLabel Kernel API + * + * This file defines the kernel API for the NetLabel system. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <net/ip.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> +#include <asm/bug.h> + +#include "netlabel_domainhash.h" +#include "netlabel_unlabeled.h" +#include "netlabel_user.h" + +/* + * LSM Functions + */ + +/** + * netlbl_socket_setattr - Label a socket using the correct protocol + * @sock: the socket to label + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given socket using the security attributes + * specified in @secattr. This function requires exclusive access to + * @sock->sk, which means it either needs to be in the process of being + * created or locked via lock_sock(sock->sk). Returns zero on success, + * negative values on failure. + * + */ +int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOENT; + struct netlbl_dom_map *dom_entry; + + rcu_read_lock(); + dom_entry = netlbl_domhsh_getentry(secattr->domain); + if (dom_entry == NULL) + goto socket_setattr_return; + switch (dom_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_socket_setattr(sock, + dom_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + +socket_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** + * netlbl_sock_getattr - Determine the security attributes of a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Examines the given sock to see any NetLabel style labeling has been + * applied to the sock, if so it parses the socket label and returns the + * security attributes in @secattr. Returns zero on success, negative values + * on failure. + * + */ +int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_sock_getattr(sk, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + +/** + * netlbl_socket_getattr - Determine the security attributes of a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Examines the given socket to see any NetLabel style labeling has been + * applied to the socket, if so it parses the socket label and returns the + * security attributes in @secattr. Returns zero on success, negative values + * on failure. + * + */ +int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_socket_getattr(sock, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + +/** + * netlbl_skbuff_getattr - Determine the security attributes of a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Examines the given packet to see if a recognized form of packet labeling + * is present, if so it parses the packet label and returns the security + * attributes in @secattr. Returns zero on success, negative values on + * failure. + * + */ +int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_skbuff_getattr(skb, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + +/** + * netlbl_skbuff_err - Handle a LSM error on a sk_buff + * @skb: the packet + * @error: the error code + * + * Description: + * Deal with a LSM problem when handling the packet in @skb, typically this is + * a permission denied problem (-EACCES). The correct action is determined + * according to the packet's labeling protocol. + * + */ +void netlbl_skbuff_err(struct sk_buff *skb, int error) +{ + if (CIPSO_V4_OPTEXIST(skb)) + cipso_v4_error(skb, error, 0); +} + +/** + * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches + * + * Description: + * For all of the NetLabel protocols that support some form of label mapping + * cache, invalidate the cache. Returns zero on success, negative values on + * error. + * + */ +void netlbl_cache_invalidate(void) +{ + cipso_v4_cache_invalidate(); +} + +/** + * netlbl_cache_add - Add an entry to a NetLabel protocol cache + * @skb: the packet + * @secattr: the packet's security attributes + * + * Description: + * Add the LSM security attributes for the given packet to the underlying + * NetLabel protocol's label mapping cache. Returns zero on success, negative + * values on error. + * + */ +int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + if (secattr->cache.data == NULL) + return -ENOMSG; + + if (CIPSO_V4_OPTEXIST(skb)) + return cipso_v4_cache_add(skb, secattr); + + return -ENOMSG; +} + +/* + * Setup Functions + */ + +/** + * netlbl_init - Initialize NetLabel + * + * Description: + * Perform the required NetLabel initialization before first use. + * + */ +static int __init netlbl_init(void) +{ + int ret_val; + + printk(KERN_INFO "NetLabel: Initializing\n"); + printk(KERN_INFO "NetLabel: domain hash size = %u\n", + (1 << NETLBL_DOMHSH_BITSIZE)); + printk(KERN_INFO "NetLabel: protocols =" + " UNLABELED" + " CIPSOv4" + "\n"); + + ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE); + if (ret_val != 0) + goto init_failure; + + ret_val = netlbl_netlink_init(); + if (ret_val != 0) + goto init_failure; + + ret_val = netlbl_unlabel_defconf(); + if (ret_val != 0) + goto init_failure; + printk(KERN_INFO "NetLabel: unlabeled traffic allowed by default\n"); + + return 0; + +init_failure: + panic("NetLabel: failed to initialize properly (%d)\n", ret_val); +} + +subsys_initcall(netlbl_init); diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c new file mode 100644 index 000000000000..53c9079ad2c3 --- /dev/null +++ b/net/netlabel/netlabel_mgmt.c @@ -0,0 +1,648 @@ +/* + * NetLabel Management Support + * + * This file defines the management functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/netlink.h> +#include <net/genetlink.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> + +#include "netlabel_domainhash.h" +#include "netlabel_user.h" +#include "netlabel_mgmt.h" + +/* Argument struct for netlbl_domhsh_walk() */ +struct netlbl_domhsh_walk_arg { + struct netlink_callback *nl_cb; + struct sk_buff *skb; + u32 seq; +}; + +/* NetLabel Generic NETLINK CIPSOv4 family */ +static struct genl_family netlbl_mgmt_gnl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = NETLBL_NLTYPE_MGMT_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_MGMT_A_MAX, +}; + +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { + [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING }, + [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 }, + [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 }, + [NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 }, +}; + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_mgmt_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADD message and add the domains from the message + * to the hash table. See netlabel.h for a description of the message format. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct netlbl_dom_map *entry = NULL; + size_t tmp_size; + u32 tmp_val; + struct netlbl_audit audit_info; + + if (!info->attrs[NLBL_MGMT_A_DOMAIN] || + !info->attrs[NLBL_MGMT_A_PROTOCOL]) + goto add_failure; + + netlbl_netlink_auditinfo(skb, &audit_info); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); + entry->domain = kmalloc(tmp_size, GFP_KERNEL); + if (entry->domain == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = netlbl_domhsh_add(entry, &audit_info); + break; + case NETLBL_NLTYPE_CIPSOV4: + if (!info->attrs[NLBL_MGMT_A_CV4DOI]) + goto add_failure; + + tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); + /* We should be holding a rcu_read_lock() here while we hold + * the result but since the entry will always be deleted when + * the CIPSO DOI is deleted we aren't going to keep the + * lock. */ + rcu_read_lock(); + entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (entry->type_def.cipsov4 == NULL) { + rcu_read_unlock(); + goto add_failure; + } + ret_val = netlbl_domhsh_add(entry, &audit_info); + rcu_read_unlock(); + break; + default: + goto add_failure; + } + if (ret_val != 0) + goto add_failure; + + return 0; + +add_failure: + if (entry) + kfree(entry->domain); + kfree(entry); + return ret_val; +} + +/** + * netlbl_mgmt_remove - Handle a REMOVE message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated REMOVE message and remove the specified domain + * mappings. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) +{ + char *domain; + struct netlbl_audit audit_info; + + if (!info->attrs[NLBL_MGMT_A_DOMAIN]) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); + return netlbl_domhsh_remove(domain, &audit_info); +} + +/** + * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL + * @entry: the domain mapping hash table entry + * @arg: the netlbl_domhsh_walk_arg structure + * + * Description: + * This function is designed to be used as a callback to the + * netlbl_domhsh_walk() function for use in generating a response for a LISTALL + * message. Returns the size of the message on success, negative values on + * failure. + * + */ +static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) +{ + int ret_val = -ENOMEM; + struct netlbl_domhsh_walk_arg *cb_arg = arg; + void *data; + + data = netlbl_netlink_hdr_put(cb_arg->skb, + NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, + netlbl_mgmt_gnl_family.id, + NLM_F_MULTI, + NLBL_MGMT_C_LISTALL); + if (data == NULL) + goto listall_cb_failure; + + ret_val = nla_put_string(cb_arg->skb, + NLBL_MGMT_A_DOMAIN, + entry->domain); + if (ret_val != 0) + goto listall_cb_failure; + ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type); + if (ret_val != 0) + goto listall_cb_failure; + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(cb_arg->skb, + NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); + if (ret_val != 0) + goto listall_cb_failure; + break; + } + + cb_arg->seq++; + return genlmsg_end(cb_arg->skb, data); + +listall_cb_failure: + genlmsg_cancel(cb_arg->skb, data); + return ret_val; +} + +/** + * netlbl_mgmt_listall - Handle a LISTALL message + * @skb: the NETLINK buffer + * @cb: the NETLINK callback + * + * Description: + * Process a user generated LISTALL message and dumps the domain hash table in + * a form suitable for use in a kernel generated LISTALL message. Returns zero + * on success, negative values on failure. + * + */ +static int netlbl_mgmt_listall(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct netlbl_domhsh_walk_arg cb_arg; + u32 skip_bkt = cb->args[0]; + u32 skip_chain = cb->args[1]; + + cb_arg.nl_cb = cb; + cb_arg.skb = skb; + cb_arg.seq = cb->nlh->nlmsg_seq; + + netlbl_domhsh_walk(&skip_bkt, + &skip_chain, + netlbl_mgmt_listall_cb, + &cb_arg); + + cb->args[0] = skip_bkt; + cb->args[1] = skip_chain; + return skb->len; +} + +/** + * netlbl_mgmt_adddef - Handle an ADDDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADDDEF message and respond accordingly. Returns + * zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct netlbl_dom_map *entry = NULL; + u32 tmp_val; + struct netlbl_audit audit_info; + + if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) + goto adddef_failure; + + netlbl_netlink_auditinfo(skb, &audit_info); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + ret_val = -ENOMEM; + goto adddef_failure; + } + entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = netlbl_domhsh_add_default(entry, &audit_info); + break; + case NETLBL_NLTYPE_CIPSOV4: + if (!info->attrs[NLBL_MGMT_A_CV4DOI]) + goto adddef_failure; + + tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); + /* We should be holding a rcu_read_lock() here while we hold + * the result but since the entry will always be deleted when + * the CIPSO DOI is deleted we aren't going to keep the + * lock. */ + rcu_read_lock(); + entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (entry->type_def.cipsov4 == NULL) { + rcu_read_unlock(); + goto adddef_failure; + } + ret_val = netlbl_domhsh_add_default(entry, &audit_info); + rcu_read_unlock(); + break; + default: + goto adddef_failure; + } + if (ret_val != 0) + goto adddef_failure; + + return 0; + +adddef_failure: + kfree(entry); + return ret_val; +} + +/** + * netlbl_mgmt_removedef - Handle a REMOVEDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated REMOVEDEF message and remove the default domain + * mapping. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) +{ + struct netlbl_audit audit_info; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_domhsh_remove_default(&audit_info); +} + +/** + * netlbl_mgmt_listdef - Handle a LISTDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LISTDEF message and dumps the default domain + * mapping in a form suitable for use in a kernel generated LISTDEF message. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + struct sk_buff *ans_skb = NULL; + void *data; + struct netlbl_dom_map *entry; + + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (ans_skb == NULL) + return -ENOMEM; + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_mgmt_gnl_family.id, + 0, + NLBL_MGMT_C_LISTDEF); + if (data == NULL) + goto listdef_failure; + + rcu_read_lock(); + entry = netlbl_domhsh_getentry(NULL); + if (entry == NULL) { + ret_val = -ENOENT; + goto listdef_failure_lock; + } + ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); + if (ret_val != 0) + goto listdef_failure_lock; + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(ans_skb, + NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); + if (ret_val != 0) + goto listdef_failure_lock; + break; + } + rcu_read_unlock(); + + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + if (ret_val != 0) + goto listdef_failure; + return 0; + +listdef_failure_lock: + rcu_read_unlock(); +listdef_failure: + kfree_skb(ans_skb); + return ret_val; +} + +/** + * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response + * @skb: the skb to write to + * @seq: the NETLINK sequence number + * @cb: the NETLINK callback + * @protocol: the NetLabel protocol to use in the message + * + * Description: + * This function is to be used in conjunction with netlbl_mgmt_protocols() to + * answer a application's PROTOCOLS message. Returns the size of the message + * on success, negative values on failure. + * + */ +static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, + struct netlink_callback *cb, + u32 protocol) +{ + int ret_val = -ENOMEM; + void *data; + + data = netlbl_netlink_hdr_put(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + netlbl_mgmt_gnl_family.id, + NLM_F_MULTI, + NLBL_MGMT_C_PROTOCOLS); + if (data == NULL) + goto protocols_cb_failure; + + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol); + if (ret_val != 0) + goto protocols_cb_failure; + + return genlmsg_end(skb, data); + +protocols_cb_failure: + genlmsg_cancel(skb, data); + return ret_val; +} + +/** + * netlbl_mgmt_protocols - Handle a PROTOCOLS message + * @skb: the NETLINK buffer + * @cb: the NETLINK callback + * + * Description: + * Process a user generated PROTOCOLS message and respond accordingly. + * + */ +static int netlbl_mgmt_protocols(struct sk_buff *skb, + struct netlink_callback *cb) +{ + u32 protos_sent = cb->args[0]; + + if (protos_sent == 0) { + if (netlbl_mgmt_protocols_cb(skb, + cb, + NETLBL_NLTYPE_UNLABELED) < 0) + goto protocols_return; + protos_sent++; + } + if (protos_sent == 1) { + if (netlbl_mgmt_protocols_cb(skb, + cb, + NETLBL_NLTYPE_CIPSOV4) < 0) + goto protocols_return; + protos_sent++; + } + +protocols_return: + cb->args[0] = protos_sent; + return skb->len; +} + +/** + * netlbl_mgmt_version - Handle a VERSION message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated VERSION message and respond accordingly. Returns + * zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + struct sk_buff *ans_skb = NULL; + void *data; + + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (ans_skb == NULL) + return -ENOMEM; + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_mgmt_gnl_family.id, + 0, + NLBL_MGMT_C_VERSION); + if (data == NULL) + goto version_failure; + + ret_val = nla_put_u32(ans_skb, + NLBL_MGMT_A_VERSION, + NETLBL_PROTO_VERSION); + if (ret_val != 0) + goto version_failure; + + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + if (ret_val != 0) + goto version_failure; + return 0; + +version_failure: + kfree_skb(ans_skb); + return ret_val; +} + + +/* + * NetLabel Generic NETLINK Command Definitions + */ + +static struct genl_ops netlbl_mgmt_genl_c_add = { + .cmd = NLBL_MGMT_C_ADD, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, + .doit = netlbl_mgmt_add, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_remove = { + .cmd = NLBL_MGMT_C_REMOVE, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, + .doit = netlbl_mgmt_remove, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_listall = { + .cmd = NLBL_MGMT_C_LISTALL, + .flags = 0, + .policy = netlbl_mgmt_genl_policy, + .doit = NULL, + .dumpit = netlbl_mgmt_listall, +}; + +static struct genl_ops netlbl_mgmt_genl_c_adddef = { + .cmd = NLBL_MGMT_C_ADDDEF, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, + .doit = netlbl_mgmt_adddef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_removedef = { + .cmd = NLBL_MGMT_C_REMOVEDEF, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_mgmt_genl_policy, + .doit = netlbl_mgmt_removedef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_listdef = { + .cmd = NLBL_MGMT_C_LISTDEF, + .flags = 0, + .policy = netlbl_mgmt_genl_policy, + .doit = netlbl_mgmt_listdef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_protocols = { + .cmd = NLBL_MGMT_C_PROTOCOLS, + .flags = 0, + .policy = netlbl_mgmt_genl_policy, + .doit = NULL, + .dumpit = netlbl_mgmt_protocols, +}; + +static struct genl_ops netlbl_mgmt_genl_c_version = { + .cmd = NLBL_MGMT_C_VERSION, + .flags = 0, + .policy = netlbl_mgmt_genl_policy, + .doit = netlbl_mgmt_version, + .dumpit = NULL, +}; + +/* + * NetLabel Generic NETLINK Protocol Functions + */ + +/** + * netlbl_mgmt_genl_init - Register the NetLabel management component + * + * Description: + * Register the NetLabel management component with the Generic NETLINK + * mechanism. Returns zero on success, negative values on failure. + * + */ +int netlbl_mgmt_genl_init(void) +{ + int ret_val; + + ret_val = genl_register_family(&netlbl_mgmt_gnl_family); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_add); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_remove); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_listall); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_adddef); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_removedef); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_listdef); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_protocols); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_version); + if (ret_val != 0) + return ret_val; + + return 0; +} diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h new file mode 100644 index 000000000000..3642d3bfc8eb --- /dev/null +++ b/net/netlabel/netlabel_mgmt.h @@ -0,0 +1,171 @@ +/* + * NetLabel Management Support + * + * This file defines the management functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_MGMT_H +#define _NETLABEL_MGMT_H + +#include <net/netlabel.h> + +/* + * The following NetLabel payloads are supported by the management interface. + * + * o ADD: + * Sent by an application to add a domain mapping to the NetLabel system. + * + * Required attributes: + * + * NLBL_MGMT_A_DOMAIN + * NLBL_MGMT_A_PROTOCOL + * + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * + * NLBL_MGMT_A_CV4DOI + * + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * + * o REMOVE: + * Sent by an application to remove a domain mapping from the NetLabel + * system. + * + * Required attributes: + * + * NLBL_MGMT_A_DOMAIN + * + * o LISTALL: + * This message can be sent either from an application or by the kernel in + * response to an application generated LISTALL message. When sent by an + * application there is no payload and the NLM_F_DUMP flag should be set. + * The kernel should respond with a series of the following messages. + * + * Required attributes: + * + * NLBL_MGMT_A_DOMAIN + * NLBL_MGMT_A_PROTOCOL + * + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * + * NLBL_MGMT_A_CV4DOI + * + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * + * o ADDDEF: + * Sent by an application to set the default domain mapping for the NetLabel + * system. + * + * Required attributes: + * + * NLBL_MGMT_A_PROTOCOL + * + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * + * NLBL_MGMT_A_CV4DOI + * + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * + * o REMOVEDEF: + * Sent by an application to remove the default domain mapping from the + * NetLabel system, there is no payload. + * + * o LISTDEF: + * This message can be sent either from an application or by the kernel in + * response to an application generated LISTDEF message. When sent by an + * application there is no payload. On success the kernel should send a + * response using the following format. + * + * Required attributes: + * + * NLBL_MGMT_A_PROTOCOL + * + * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * + * NLBL_MGMT_A_CV4DOI + * + * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * + * o PROTOCOLS: + * Sent by an application to request a list of configured NetLabel protocols + * in the kernel. When sent by an application there is no payload and the + * NLM_F_DUMP flag should be set. The kernel should respond with a series of + * the following messages. + * + * Required attributes: + * + * NLBL_MGMT_A_PROTOCOL + * + * o VERSION: + * Sent by an application to request the NetLabel version. When sent by an + * application there is no payload. This message type is also used by the + * kernel to respond to an VERSION request. + * + * Required attributes: + * + * NLBL_MGMT_A_VERSION + * + */ + +/* NetLabel Management commands */ +enum { + NLBL_MGMT_C_UNSPEC, + NLBL_MGMT_C_ADD, + NLBL_MGMT_C_REMOVE, + NLBL_MGMT_C_LISTALL, + NLBL_MGMT_C_ADDDEF, + NLBL_MGMT_C_REMOVEDEF, + NLBL_MGMT_C_LISTDEF, + NLBL_MGMT_C_PROTOCOLS, + NLBL_MGMT_C_VERSION, + __NLBL_MGMT_C_MAX, +}; +#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) + +/* NetLabel Management attributes */ +enum { + NLBL_MGMT_A_UNSPEC, + NLBL_MGMT_A_DOMAIN, + /* (NLA_NUL_STRING) + * the NULL terminated LSM domain string */ + NLBL_MGMT_A_PROTOCOL, + /* (NLA_U32) + * the NetLabel protocol type (defined by NETLBL_NLTYPE_*) */ + NLBL_MGMT_A_VERSION, + /* (NLA_U32) + * the NetLabel protocol version number (defined by + * NETLBL_PROTO_VERSION) */ + NLBL_MGMT_A_CV4DOI, + /* (NLA_U32) + * the CIPSOv4 DOI value */ + __NLBL_MGMT_A_MAX, +}; +#define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) + +/* NetLabel protocol functions */ +int netlbl_mgmt_genl_init(void); + +#endif diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c new file mode 100644 index 000000000000..1833ad233b39 --- /dev/null +++ b/net/netlabel/netlabel_unlabeled.c @@ -0,0 +1,280 @@ +/* + * NetLabel Unlabeled Support + * + * This file defines functions for dealing with unlabeled packets for the + * NetLabel system. The NetLabel system manages static and dynamic label + * mappings for network protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/socket.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <net/netlabel.h> +#include <asm/bug.h> + +#include "netlabel_user.h" +#include "netlabel_domainhash.h" +#include "netlabel_unlabeled.h" + +/* Accept unlabeled packets flag */ +static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0); + +/* NetLabel Generic NETLINK CIPSOv4 family */ +static struct genl_family netlbl_unlabel_gnl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = NETLBL_NLTYPE_UNLABELED_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_UNLABEL_A_MAX, +}; + +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { + [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, +}; + +/* + * Helper Functions + */ + +/** + * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag + * @value: desired value + * @audit_info: NetLabel audit information + * + * Description: + * Set the value of the unlabeled accept flag to @value. + * + */ +static void netlbl_unlabel_acceptflg_set(u8 value, + struct netlbl_audit *audit_info) +{ + struct audit_buffer *audit_buf; + u8 old_val; + + old_val = atomic_read(&netlabel_unlabel_accept_flg); + atomic_set(&netlabel_unlabel_accept_flg, value); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, + audit_info); + audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); + audit_log_end(audit_buf); +} + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_unlabel_accept - Handle an ACCEPT message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ACCEPT message and set the accept flag accordingly. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) +{ + u8 value; + struct netlbl_audit audit_info; + + if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { + value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); + if (value == 1 || value == 0) { + netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_unlabel_acceptflg_set(value, &audit_info); + return 0; + } + } + + return -EINVAL; +} + +/** + * netlbl_unlabel_list - Handle a LIST message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LIST message and respond with the current status. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct sk_buff *ans_skb; + void *data; + + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (ans_skb == NULL) + goto list_failure; + data = netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + info->snd_seq, + netlbl_unlabel_gnl_family.id, + 0, + NLBL_UNLABEL_C_LIST); + if (data == NULL) { + ret_val = -ENOMEM; + goto list_failure; + } + + ret_val = nla_put_u8(ans_skb, + NLBL_UNLABEL_A_ACPTFLG, + atomic_read(&netlabel_unlabel_accept_flg)); + if (ret_val != 0) + goto list_failure; + + genlmsg_end(ans_skb, data); + + ret_val = genlmsg_unicast(ans_skb, info->snd_pid); + if (ret_val != 0) + goto list_failure; + return 0; + +list_failure: + kfree(ans_skb); + return ret_val; +} + + +/* + * NetLabel Generic NETLINK Command Definitions + */ + +static struct genl_ops netlbl_unlabel_genl_c_accept = { + .cmd = NLBL_UNLABEL_C_ACCEPT, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_unlabel_genl_policy, + .doit = netlbl_unlabel_accept, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_unlabel_genl_c_list = { + .cmd = NLBL_UNLABEL_C_LIST, + .flags = 0, + .policy = netlbl_unlabel_genl_policy, + .doit = netlbl_unlabel_list, + .dumpit = NULL, +}; + + +/* + * NetLabel Generic NETLINK Protocol Functions + */ + +/** + * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component + * + * Description: + * Register the unlabeled packet NetLabel component with the Generic NETLINK + * mechanism. Returns zero on success, negative values on failure. + * + */ +int netlbl_unlabel_genl_init(void) +{ + int ret_val; + + ret_val = genl_register_family(&netlbl_unlabel_gnl_family); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_accept); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_list); + if (ret_val != 0) + return ret_val; + + return 0; +} + +/* + * NetLabel KAPI Hooks + */ + +/** + * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet + * @secattr: the security attributes + * + * Description: + * Determine the security attributes, if any, for an unlabled packet and return + * them in @secattr. Returns zero on success and negative values on failure. + * + */ +int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) +{ + if (atomic_read(&netlabel_unlabel_accept_flg) == 1) + return netlbl_secattr_init(secattr); + + return -ENOMSG; +} + +/** + * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets + * + * Description: + * Set the default NetLabel configuration to allow incoming unlabeled packets + * and to send unlabeled network traffic by default. + * + */ +int netlbl_unlabel_defconf(void) +{ + int ret_val; + struct netlbl_dom_map *entry; + struct netlbl_audit audit_info; + + /* Only the kernel is allowed to call this function and the only time + * it is called is at bootup before the audit subsystem is reporting + * messages so don't worry to much about these values. */ + security_task_getsecid(current, &audit_info.secid); + audit_info.loginuid = 0; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) + return -ENOMEM; + entry->type = NETLBL_NLTYPE_UNLABELED; + ret_val = netlbl_domhsh_add_default(entry, &audit_info); + if (ret_val != 0) + return ret_val; + + netlbl_unlabel_acceptflg_set(1, &audit_info); + + return 0; +} diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h new file mode 100644 index 000000000000..c2917fbb42cf --- /dev/null +++ b/net/netlabel/netlabel_unlabeled.h @@ -0,0 +1,89 @@ +/* + * NetLabel Unlabeled Support + * + * This file defines functions for dealing with unlabeled packets for the + * NetLabel system. The NetLabel system manages static and dynamic label + * mappings for network protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_UNLABELED_H +#define _NETLABEL_UNLABELED_H + +#include <net/netlabel.h> + +/* + * The following NetLabel payloads are supported by the Unlabeled subsystem. + * + * o ACCEPT + * This message is sent from an application to specify if the kernel should + * allow unlabled packets to pass if they do not match any of the static + * mappings defined in the unlabeled module. + * + * Required attributes: + * + * NLBL_UNLABEL_A_ACPTFLG + * + * o LIST + * This message can be sent either from an application or by the kernel in + * response to an application generated LIST message. When sent by an + * application there is no payload. The kernel should respond to a LIST + * message with a LIST message on success. + * + * Required attributes: + * + * NLBL_UNLABEL_A_ACPTFLG + * + */ + +/* NetLabel Unlabeled commands */ +enum { + NLBL_UNLABEL_C_UNSPEC, + NLBL_UNLABEL_C_ACCEPT, + NLBL_UNLABEL_C_LIST, + __NLBL_UNLABEL_C_MAX, +}; +#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) + +/* NetLabel Unlabeled attributes */ +enum { + NLBL_UNLABEL_A_UNSPEC, + NLBL_UNLABEL_A_ACPTFLG, + /* (NLA_U8) + * if true then unlabeled packets are allowed to pass, else unlabeled + * packets are rejected */ + __NLBL_UNLABEL_A_MAX, +}; +#define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1) + +/* NetLabel protocol functions */ +int netlbl_unlabel_genl_init(void); + +/* Process Unlabeled incoming network packets */ +int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr); + +/* Set the default configuration to allow Unlabeled packets */ +int netlbl_unlabel_defconf(void); + +#endif diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c new file mode 100644 index 000000000000..98a416381e61 --- /dev/null +++ b/net/netlabel/netlabel_user.c @@ -0,0 +1,117 @@ +/* + * NetLabel NETLINK Interface + * + * This file defines the NETLINK interface for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/socket.h> +#include <linux/audit.h> +#include <linux/tty.h> +#include <linux/security.h> +#include <net/sock.h> +#include <net/netlink.h> +#include <net/genetlink.h> +#include <net/netlabel.h> +#include <asm/bug.h> + +#include "netlabel_mgmt.h" +#include "netlabel_unlabeled.h" +#include "netlabel_cipso_v4.h" +#include "netlabel_user.h" + +/* + * NetLabel NETLINK Setup Functions + */ + +/** + * netlbl_netlink_init - Initialize the NETLINK communication channel + * + * Description: + * Call out to the NetLabel components so they can register their families and + * commands with the Generic NETLINK mechanism. Returns zero on success and + * non-zero on failure. + * + */ +int netlbl_netlink_init(void) +{ + int ret_val; + + ret_val = netlbl_mgmt_genl_init(); + if (ret_val != 0) + return ret_val; + + ret_val = netlbl_cipsov4_genl_init(); + if (ret_val != 0) + return ret_val; + + ret_val = netlbl_unlabel_genl_init(); + if (ret_val != 0) + return ret_val; + + return 0; +} + +/* + * NetLabel Audit Functions + */ + +/** + * netlbl_audit_start_common - Start an audit message + * @type: audit message type + * @audit_info: NetLabel audit information + * + * Description: + * Start an audit message using the type specified in @type and fill the audit + * message with some fields common to all NetLabel audit messages. Returns + * a pointer to the audit buffer on success, NULL on failure. + * + */ +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info) +{ + struct audit_context *audit_ctx = current->audit_context; + struct audit_buffer *audit_buf; + char *secctx; + u32 secctx_len; + + audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type); + if (audit_buf == NULL) + return NULL; + + audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid); + + if (audit_info->secid != 0 && + security_secid_to_secctx(audit_info->secid, + &secctx, + &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + + return audit_buf; +} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h new file mode 100644 index 000000000000..47967ef32964 --- /dev/null +++ b/net/netlabel/netlabel_user.h @@ -0,0 +1,96 @@ +/* + * NetLabel NETLINK Interface + * + * This file defines the NETLINK interface for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_USER_H +#define _NETLABEL_USER_H + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/capability.h> +#include <linux/audit.h> +#include <net/netlink.h> +#include <net/genetlink.h> +#include <net/netlabel.h> + +/* NetLabel NETLINK helper functions */ + +/** + * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff + * @skb: the packet + * @pid: the PID of the receipient + * @seq: the sequence number + * @type: the generic NETLINK message family type + * @cmd: command + * + * Description: + * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr + * struct to the packet. Returns a pointer to the start of the payload buffer + * on success or NULL on failure. + * + */ +static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, + u32 pid, + u32 seq, + int type, + int flags, + u8 cmd) +{ + return genlmsg_put(skb, + pid, + seq, + type, + 0, + flags, + cmd, + NETLBL_PROTO_VERSION); +} + +/** + * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg + * @skb: the packet + * @audit_info: NetLabel audit information + */ +static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, + struct netlbl_audit *audit_info) +{ + audit_info->secid = NETLINK_CB(skb).sid; + audit_info->loginuid = NETLINK_CB(skb).loginuid; +} + +/* NetLabel NETLINK I/O functions */ + +int netlbl_netlink_init(void); + +/* NetLabel Audit Functions */ + +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info); + +#endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3862e73d14d7..d56e0d21f919 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -21,7 +21,6 @@ * mandatory if CONFIG_NET=y these days */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> @@ -157,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk) static void netlink_table_grab(void) { - write_lock_bh(&nl_table_lock); + write_lock_irq(&nl_table_lock); if (atomic_read(&nl_table_users)) { DECLARE_WAITQUEUE(wait, current); @@ -167,9 +166,9 @@ static void netlink_table_grab(void) set_current_state(TASK_UNINTERRUPTIBLE); if (atomic_read(&nl_table_users) == 0) break; - write_unlock_bh(&nl_table_lock); + write_unlock_irq(&nl_table_lock); schedule(); - write_lock_bh(&nl_table_lock); + write_lock_irq(&nl_table_lock); } __set_current_state(TASK_RUNNING); @@ -179,7 +178,7 @@ static void netlink_table_grab(void) static __inline__ void netlink_table_ungrab(void) { - write_unlock_bh(&nl_table_lock); + write_unlock_irq(&nl_table_lock); wake_up(&nl_table_wait); } @@ -563,10 +562,9 @@ static int netlink_alloc_groups(struct sock *sk) if (err) return err; - nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL); + nlk->groups = kzalloc(NLGRPSZ(groups), GFP_KERNEL); if (nlk->groups == NULL) return -ENOMEM; - memset(nlk->groups, 0, NLGRPSZ(groups)); nlk->ngroups = groups; return 0; } @@ -1149,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); + skb = nlmsg_new(len, GFP_KERNEL); if (skb==NULL) goto out; @@ -1275,8 +1273,7 @@ netlink_kernel_create(int unit, unsigned int groups, struct netlink_sock *nlk; unsigned long *listeners = NULL; - if (!nl_table) - return NULL; + BUG_ON(!nl_table); if (unit<0 || unit>=MAX_LINKS) return NULL; @@ -1344,19 +1341,18 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb; struct nlmsghdr *nlh; - int len; + int len, err = -ENOBUFS; skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); if (!skb) - return -ENOBUFS; + goto errout; spin_lock(&nlk->cb_lock); cb = nlk->cb; if (cb == NULL) { - spin_unlock(&nlk->cb_lock); - kfree_skb(skb); - return -EINVAL; + err = -EINVAL; + goto errout_skb; } len = cb->dump(skb, cb); @@ -1368,8 +1364,12 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); + nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); + if (!nlh) + goto errout_skb; + + memcpy(nlmsg_data(nlh), &len, sizeof(len)); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); @@ -1381,8 +1381,11 @@ static int netlink_dump(struct sock *sk) netlink_destroy_callback(cb); return 0; -nlmsg_failure: - return -ENOBUFS; +errout_skb: + spin_unlock(&nlk->cb_lock); + kfree_skb(skb); +errout: + return err; } int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, @@ -1394,11 +1397,10 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct sock *sk; struct netlink_sock *nlk; - cb = kmalloc(sizeof(*cb), GFP_KERNEL); + cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (cb == NULL) return -ENOBUFS; - memset(cb, 0, sizeof(*cb)); cb->dump = dump; cb->done = done; cb->nlh = nlh; @@ -1435,11 +1437,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) int size; if (err == 0) - size = NLMSG_SPACE(sizeof(struct nlmsgerr)); + size = nlmsg_total_size(sizeof(*errmsg)); else - size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); + size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh)); - skb = alloc_skb(size, GFP_KERNEL); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -1455,16 +1457,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, NLMSG_ERROR, sizeof(struct nlmsgerr), 0); - errmsg = NLMSG_DATA(rep); + errmsg = nlmsg_data(rep); errmsg->error = err; - memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); + memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) { - unsigned int total_len; struct nlmsghdr *nlh; int err; @@ -1474,8 +1475,6 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return 0; - total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); - if (cb(skb, nlh, &err) < 0) { /* Not an error, but we have to interrupt processing * here. Note: that in this case we do not pull @@ -1487,7 +1486,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, } else if (nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, 0); - skb_pull(skb, total_len); + netlink_queue_skip(nlh, skb); } return 0; @@ -1550,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) skb_pull(skb, msglen); } +/** + * nlmsg_notify - send a notification netlink message + * @sk: netlink socket to use + * @skb: notification message + * @pid: destination netlink pid for reports or 0 + * @group: destination multicast group or 0 + * @report: 1 to report back, 0 to disable + * @flags: allocation flags + */ +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, + unsigned int group, int report, gfp_t flags) +{ + int err = 0; + + if (group) { + int exclude_pid = 0; + + if (report) { + atomic_inc(&skb->users); + exclude_pid = pid; + } + + /* errors reported via destination sk->sk_err */ + nlmsg_multicast(sk, skb, exclude_pid, group, flags); + } + + if (report) + err = nlmsg_unicast(sk, skb, pid); + + return err; +} + #ifdef CONFIG_PROC_FS struct nl_seq_iter { int link; @@ -1669,7 +1700,7 @@ static int netlink_seq_open(struct inode *inode, struct file *file) struct nl_seq_iter *iter; int err; - iter = kmalloc(sizeof(*iter), GFP_KERNEL); + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; @@ -1679,7 +1710,6 @@ static int netlink_seq_open(struct inode *inode, struct file *file) return err; } - memset(iter, 0, sizeof(*iter)); seq = file->private_data; seq->private = iter; return 0; @@ -1732,8 +1762,6 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; -extern void netlink_skb_parms_too_large(void); - static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1745,17 +1773,11 @@ static int __init netlink_proto_init(void) if (err != 0) goto out; - if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) - netlink_skb_parms_too_large(); - - nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL); - if (!nl_table) { -enomem: - printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n"); - return -ENOMEM; - } + BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); - memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS); + nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); + if (!nl_table) + goto panic; if (num_physpages >= (128 * 1024)) max = num_physpages >> (21 - PAGE_SHIFT); @@ -1775,7 +1797,7 @@ enomem: nl_pid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); kfree(nl_table); - goto enomem; + goto panic; } memset(hash->table, 0, 1 * sizeof(*hash->table)); hash->max_shift = order; @@ -1792,6 +1814,8 @@ enomem: rtnetlink_init(); out: return err; +panic: + panic("netlink_init: Cannot allocate nl_table\n"); } core_initcall(netlink_proto_init); @@ -1807,4 +1831,4 @@ EXPORT_SYMBOL(netlink_set_err); EXPORT_SYMBOL(netlink_set_nonroot); EXPORT_SYMBOL(netlink_unicast); EXPORT_SYMBOL(netlink_unregister_notifier); - +EXPORT_SYMBOL(nlmsg_notify); diff --git a/net/netlink/attr.c b/net/netlink/attr.c index fffef4ab276f..004139557e09 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -5,7 +5,6 @@ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -21,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), - [NLA_STRING] = 1, [NLA_NESTED] = NLA_HDRLEN, }; @@ -29,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, struct nla_policy *policy) { struct nla_policy *pt; - int minlen = 0; + int minlen = 0, attrlen = nla_len(nla); if (nla->nla_type <= 0 || nla->nla_type > maxtype) return 0; @@ -38,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); - if (pt->minlen) - minlen = pt->minlen; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; - if (pt->type == NLA_FLAG && nla_len(nla) > 0) - return -ERANGE; + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; - if (nla_len(nla) < minlen) - return -ERANGE; + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } return 0; } @@ -256,6 +284,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + +/** * nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on * @attrtype: attribute type @@ -276,6 +324,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** + * nla_reserve - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @len: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + +/** * __nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type @@ -294,6 +360,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, memcpy(nla_data(nla), data, attrlen); } +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} /** * nla_put - Add a netlink attribute to a socket buffer @@ -314,15 +396,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) return 0; } +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -1; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} EXPORT_SYMBOL(nla_validate); EXPORT_SYMBOL(nla_parse); EXPORT_SYMBOL(nla_find); EXPORT_SYMBOL(nla_strlcpy); EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); EXPORT_SYMBOL(nla_memcpy); EXPORT_SYMBOL(nla_memcmp); EXPORT_SYMBOL(nla_strcmp); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f329b72578f5..49bc2db7982b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -5,7 +5,6 @@ * Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -320,7 +319,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) { + if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) { err = -EPERM; goto errout; } @@ -388,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len) static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { + struct nlattr *nla_ops; + struct genl_ops *ops; void *hdr; + int idx = 1; hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, family->version); @@ -397,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); + NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); + NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); + NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); + + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) + goto nla_put_failure; + + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + + if (ops->policy) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); + + if (ops->doit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); + + if (ops->dumpit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); + + nla_nest_end(skb, nest); + } + + nla_nest_end(skb, nla_ops); return genlmsg_end(skb, hdr); @@ -412,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + if (chains_to_skip != 0) + genl_lock(); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -429,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: + if (chains_to_skip != 0) + genl_unlock(); + cb->args[0] = i; cb->args[1] = n; @@ -441,7 +480,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE); + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) return ERR_PTR(-ENOBUFS); @@ -456,7 +495,8 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, - [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, + [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, + .len = GENL_NAMSIZ - 1 }, }; static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) @@ -471,12 +511,9 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { - char name[GENL_NAMSIZ]; - - if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME], - GENL_NAMSIZ) >= GENL_NAMSIZ) - goto errout; + char *name; + name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); res = genl_family_find_byname(name); } @@ -511,7 +548,7 @@ static int genl_ctrl_event(int event, void *data) if (IS_ERR(msg)) return PTR_ERR(msg); - genlmsg_multicast(msg, 0, GENL_ID_CTRL); + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); break; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3669cb953e6e..1d50f801f181 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -8,7 +8,6 @@ * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk) */ -#include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/capability.h> @@ -67,6 +66,14 @@ static DEFINE_SPINLOCK(nr_list_lock); static const struct proto_ops nr_proto_ops; /* + * NETROM network devices are virtual network devices encapsulating NETROM + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key nr_netdev_xmit_lock_key; + +/* * Socket removal during an interrupt is now safe. */ static void nr_remove_socket(struct sock *sk) @@ -801,7 +808,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) /* Now attach up the new socket */ kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsock->sk = newsk; out: @@ -986,19 +993,19 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) nr_make->vr = 0; nr_make->vl = 0; nr_make->state = NR_STATE_3; - sk->sk_ack_backlog++; - - nr_insert_socket(make); - + sk_acceptq_added(sk); skb_queue_head(&sk->sk_receive_queue, skb); - nr_start_heartbeat(make); - nr_start_idletimer(make); - if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb->len); bh_unlock_sock(sk); + + nr_insert_socket(make); + + nr_start_heartbeat(make); + nr_start_idletimer(make); + return 1; } @@ -1383,14 +1390,12 @@ static int __init nr_proto_init(void) return -1; } - dev_nr = kmalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL); + dev_nr = kzalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_nr == NULL) { printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); return -1; } - memset(dev_nr, 0x00, nr_ndevs * sizeof(struct net_device *)); - for (i = 0; i < nr_ndevs; i++) { char name[IFNAMSIZ]; struct net_device *dev; @@ -1408,6 +1413,7 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } + lockdep_set_class(&dev->_xmit_lock, &nr_netdev_xmit_lock_key); dev_nr[i] = dev; } diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 621e5586ab03..9b8eb54971ab 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -6,7 +6,6 @@ * * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b3b9097c87c7..c11737f472d6 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -725,15 +725,17 @@ void nr_link_failed(ax25_cb *ax25, int reason) struct nr_node *nr_node = NULL; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(s, node, &nr_neigh_list) + nr_neigh_for_each(s, node, &nr_neigh_list) { if (s->ax25 == ax25) { nr_neigh_hold(s); nr_neigh = s; break; } + } spin_unlock_bh(&nr_neigh_list_lock); - if (nr_neigh == NULL) return; + if (nr_neigh == NULL) + return; nr_neigh->ax25 = NULL; ax25_cb_put(ax25); @@ -743,11 +745,13 @@ void nr_link_failed(ax25_cb *ax25, int reason) return; } spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) + nr_node_for_each(nr_node, node, &nr_node_list) { nr_node_lock(nr_node); - if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) + if (nr_node->which < nr_node->count && + nr_node->routes[nr_node->which].neighbour == nr_neigh) nr_node->which++; nr_node_unlock(nr_node); + } spin_unlock_bh(&nr_node_list_lock); nr_neigh_put(nr_neigh); } diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 75b72d389ba9..ddba1c144260 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -138,8 +138,8 @@ static void nr_heartbeat_expiry(unsigned long param) if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { sock_hold(sk); - nr_destroy_socket(sk); bh_unlock_sock(sk); + nr_destroy_socket(sk); sock_put(sk); return; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9db7dbdb16e6..f4ccb90e6739 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,7 +49,6 @@ * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> @@ -428,21 +427,24 @@ out_unlock: } #endif -static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) +static inline int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned *snaplen) { struct sk_filter *filter; + int err = 0; - bh_lock_sock(sk); - filter = sk->sk_filter; - /* - * Our caller already checked that filter != NULL but we need to - * verify that under bh_lock_sock() to be safe - */ - if (likely(filter != NULL)) - res = sk_run_filter(skb, filter->insns, filter->len); - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) { + err = sk_run_filter(skb, filter->insns, filter->len); + if (!err) + err = -EPERM; + else if (*snaplen > err) + *snaplen = err; + } + rcu_read_unlock_bh(); - return res; + return err; } /* @@ -492,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -587,20 +584,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb->nh.raw - skb->data); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; } } snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; @@ -627,8 +619,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe if ((int)snaplen < 0) snaplen = 0; } - if (snaplen > skb->len-skb->data_len) - snaplen = skb->len-skb->data_len; spin_lock(&sk->sk_receive_queue.lock); h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head); @@ -645,7 +635,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); - memcpy((u8*)h + macoff, skb->data, snaplen); + skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen); h->tp_len = skb->len; h->tp_snaplen = snaplen; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 55564efccf11..08a542855654 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -10,7 +10,6 @@ * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -68,6 +67,14 @@ static struct proto_ops rose_proto_ops; ax25_address rose_callsign; /* + * ROSE network devices are virtual network devices encapsulating ROSE + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key rose_netdev_xmit_lock_key; + +/* * Convert a ROSE address into text. */ const char *rose2asc(const rose_address *addr) @@ -753,7 +760,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose_insert_socket(sk); /* Finish the bind */ } - +rose_try_next_neigh: rose->dest_addr = addr->srose_addr; rose->dest_call = addr->srose_call; rose->rand = ((long)rose & 0xFFFF) + rose->lci; @@ -811,6 +818,11 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le } if (sk->sk_state != TCP_ESTABLISHED) { + /* Try next neighbour */ + rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic); + if (rose->neighbour) + goto rose_try_next_neigh; + /* No more neighbour */ sock->state = SS_UNCONNECTED; return sock_error(sk); /* Always set at this point */ } @@ -1486,14 +1498,13 @@ static int __init rose_proto_init(void) rose_callsign = null_ax25_address; - dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); + dev_rose = kzalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); rc = -ENOMEM; goto out_proto_unregister; } - memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*)); for (i = 0; i < rose_ndevs; i++) { struct net_device *dev; char name[IFNAMSIZ]; @@ -1512,6 +1523,7 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } + lockdep_set_class(&dev->_xmit_lock, &rose_netdev_xmit_lock_key); dev_rose[i] = dev; } diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 2a1bf8e119e5..7c279e2659ec 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> @@ -60,6 +59,7 @@ static int rose_rebuild_header(struct sk_buff *skb) struct net_device_stats *stats = netdev_priv(dev); unsigned char *bp = (unsigned char *)skb->data; struct sk_buff *skbn; + unsigned int len; #ifdef CONFIG_INET if (arp_find(bp + 7, skb)) { @@ -76,6 +76,8 @@ static int rose_rebuild_header(struct sk_buff *skb) kfree_skb(skb); + len = skbn->len; + if (!rose_route_frame(skbn, NULL)) { kfree_skb(skbn); stats->tx_errors++; @@ -83,7 +85,7 @@ static int rose_rebuild_header(struct sk_buff *skb) } stats->tx_packets++; - stats->tx_bytes += skbn->len; + stats->tx_bytes += len; #endif return 1; } diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c index c4aeb7d40266..d07122b57e0d 100644 --- a/net/rxrpc/call.c +++ b/net/rxrpc/call.c @@ -1098,8 +1098,7 @@ static void rxrpc_call_receive_data_packet(struct rxrpc_call *call, call->app_ready_seq = pmsg->seq; call->app_ready_qty += pmsg->dsize; - list_del_init(&pmsg->link); - list_add_tail(&pmsg->link, &call->app_readyq); + list_move_tail(&pmsg->link, &call->app_readyq); } /* see if we've got the last packet yet */ diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c index 0e0a4553499f..93d2c55ad2d5 100644 --- a/net/rxrpc/connection.c +++ b/net/rxrpc/connection.c @@ -58,13 +58,12 @@ static inline int __rxrpc_create_connection(struct rxrpc_peer *peer, _enter("%p",peer); /* allocate and initialise a connection record */ - conn = kmalloc(sizeof(struct rxrpc_connection), GFP_KERNEL); + conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL); if (!conn) { _leave(" = -ENOMEM"); return -ENOMEM; } - memset(conn, 0, sizeof(struct rxrpc_connection)); atomic_set(&conn->usage, 1); INIT_LIST_HEAD(&conn->link); @@ -402,8 +401,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn) /* move to graveyard queue */ _debug("burying connection: {%08x}", ntohl(conn->conn_id)); - list_del(&conn->link); - list_add_tail(&conn->link, &peer->conn_graveyard); + list_move_tail(&conn->link, &peer->conn_graveyard); rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ); @@ -536,13 +534,12 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn, return -EINVAL; } - msg = kmalloc(sizeof(struct rxrpc_message), alloc_flags); + msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags); if (!msg) { _leave(" = -ENOMEM"); return -ENOMEM; } - memset(msg, 0, sizeof(*msg)); atomic_set(&msg->usage, 1); INIT_LIST_HEAD(&msg->link); diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c index 1aadd026d354..cea4eb5e2497 100644 --- a/net/rxrpc/krxsecd.c +++ b/net/rxrpc/krxsecd.c @@ -160,8 +160,7 @@ void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans) list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) { msg = list_entry(_p, struct rxrpc_message, link); if (msg->trans == trans) { - list_del(&msg->link); - list_add_tail(&msg->link, &tmp); + list_move_tail(&msg->link, &tmp); atomic_dec(&rxrpc_krxsecd_qcount); } } diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c index ed38f5b17c1b..8a275157a3bb 100644 --- a/net/rxrpc/peer.c +++ b/net/rxrpc/peer.c @@ -58,13 +58,12 @@ static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr, _enter("%p,%08x", trans, ntohl(addr)); /* allocate and initialise a peer record */ - peer = kmalloc(sizeof(struct rxrpc_peer), GFP_KERNEL); + peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL); if (!peer) { _leave(" = -ENOMEM"); return -ENOMEM; } - memset(peer, 0, sizeof(struct rxrpc_peer)); atomic_set(&peer->usage, 1); INIT_LIST_HEAD(&peer->link); diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c index 56adf16fed0c..9896fd87a4d4 100644 --- a/net/rxrpc/rxrpc_syms.c +++ b/net/rxrpc/rxrpc_syms.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <rxrpc/transport.h> diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index fbf98729c748..6374df7e77d1 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/module.h> diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c index dbe6105e83a5..465efc86fccf 100644 --- a/net/rxrpc/transport.c +++ b/net/rxrpc/transport.c @@ -68,11 +68,10 @@ int rxrpc_create_transport(unsigned short port, _enter("%hu", port); - trans = kmalloc(sizeof(struct rxrpc_transport), GFP_KERNEL); + trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL); if (!trans) return -ENOMEM; - memset(trans, 0, sizeof(struct rxrpc_transport)); atomic_set(&trans->usage, 1); INIT_LIST_HEAD(&trans->services); INIT_LIST_HEAD(&trans->link); @@ -312,13 +311,12 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans, _enter(""); - msg = kmalloc(sizeof(struct rxrpc_message), GFP_KERNEL); + msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL); if (!msg) { _leave(" = -ENOMEM"); return -ENOMEM; } - memset(msg, 0, sizeof(*msg)); atomic_set(&msg->usage, 1); list_add_tail(&msg->link,msgq); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 13eeee582886..8298ea9ffe19 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -305,7 +305,7 @@ config NET_CLS_U32 tristate "Universal 32bit comparisons w/ hashing (U32)" select NET_CLS ---help--- - Say Y here to be able to classify packetes using a universal + Say Y here to be able to classify packets using a universal 32bit pieces based comparison scheme. To compile this code as a module, choose M here: the @@ -485,7 +485,7 @@ config NET_ACT_IPT tristate "IPtables targets" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES ---help--- - Say Y here to be able to invoke iptables targets after succesful + Say Y here to be able to invoke iptables targets after successful classification. To compile this code as a module, choose M here: the @@ -537,8 +537,8 @@ config NET_ESTIMATOR ---help--- Say Y here to allow using rate estimators to estimate the current rate-of-flow for network devices, queues, etc. This module is - automaticaly selected if needed but can be selected manually for - statstical purposes. + automatically selected if needed but can be selected manually for + statistical purposes. endif # NET_SCHED diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2ffa11c6e8de..835070e9169c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -34,16 +33,230 @@ #include <net/sch_generic.h> #include <net/act_api.h> -#if 0 /* control */ -#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) -#else -#define DPRINTK(format, args...) +void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) +{ + unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); + struct tcf_common **p1p; + + for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { + if (*p1p == p) { + write_lock_bh(hinfo->lock); + *p1p = p->tcfc_next; + write_unlock_bh(hinfo->lock); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&p->tcfc_bstats, + &p->tcfc_rate_est); #endif -#if 0 /* data */ -#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args) -#else -#define D2PRINTK(format, args...) + kfree(p); + return; + } + } + BUG_TRAP(0); +} +EXPORT_SYMBOL(tcf_hash_destroy); + +int tcf_hash_release(struct tcf_common *p, int bind, + struct tcf_hashinfo *hinfo) +{ + int ret = 0; + + if (p) { + if (bind) + p->tcfc_bindcnt--; + + p->tcfc_refcnt--; + if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { + tcf_hash_destroy(p, hinfo); + ret = 1; + } + } + return ret; +} +EXPORT_SYMBOL(tcf_hash_release); + +static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, + struct tc_action *a, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p; + int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; + struct rtattr *r ; + + read_lock(hinfo->lock); + + s_i = cb->args[0]; + + for (i = 0; i < (hinfo->hmask + 1); i++) { + p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + + for (; p; p = p->tcfc_next) { + index++; + if (index < s_i) + continue; + a->priv = p; + a->order = n_i; + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + err = tcf_action_dump_1(skb, a, 0, 0); + if (err < 0) { + index--; + skb_trim(skb, (u8*)r - skb->data); + goto done; + } + r->rta_len = skb->tail - (u8*)r; + n_i++; + if (n_i >= TCA_ACT_MAX_PRIO) + goto done; + } + } +done: + read_unlock(hinfo->lock); + if (n_i) + cb->args[0] += n_i; + return n_i; + +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + goto done; +} + +static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, + struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p, *s_p; + struct rtattr *r ; + int i= 0, n_i = 0; + + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); + for (i = 0; i < (hinfo->hmask + 1); i++) { + p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + + while (p != NULL) { + s_p = p->tcfc_next; + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + module_put(a->ops->owner); + n_i++; + p = s_p; + } + } + RTA_PUT(skb, TCA_FCNT, 4, &n_i); + r->rta_len = skb->tail - (u8*)r; + + return n_i; +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + return -EINVAL; +} + +int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) +{ + struct tcf_hashinfo *hinfo = a->ops->hinfo; + + if (type == RTM_DELACTION) { + return tcf_del_walker(skb, a, hinfo); + } else if (type == RTM_GETACTION) { + return tcf_dump_walker(skb, cb, a, hinfo); + } else { + printk("tcf_generic_walker: unknown action %d\n", type); + return -EINVAL; + } +} +EXPORT_SYMBOL(tcf_generic_walker); + +struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p; + + read_lock(hinfo->lock); + for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; + p = p->tcfc_next) { + if (p->tcfc_index == index) + break; + } + read_unlock(hinfo->lock); + + return p; +} +EXPORT_SYMBOL(tcf_hash_lookup); + +u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) +{ + u32 val = *idx_gen; + + do { + if (++val == 0) + val = 1; + } while (tcf_hash_lookup(val, hinfo)); + + return (*idx_gen = val); +} +EXPORT_SYMBOL(tcf_hash_new_index); + +int tcf_hash_search(struct tc_action *a, u32 index) +{ + struct tcf_hashinfo *hinfo = a->ops->hinfo; + struct tcf_common *p = tcf_hash_lookup(index, hinfo); + + if (p) { + a->priv = p; + return 1; + } + return 0; +} +EXPORT_SYMBOL(tcf_hash_search); + +struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, + struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p = NULL; + if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (bind) { + p->tcfc_bindcnt++; + p->tcfc_refcnt++; + } + a->priv = p; + } + return p; +} +EXPORT_SYMBOL(tcf_hash_check); + +struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p = kzalloc(size, GFP_KERNEL); + + if (unlikely(!p)) + return p; + p->tcfc_refcnt = 1; + if (bind) + p->tcfc_bindcnt = 1; + + spin_lock_init(&p->tcfc_lock); + p->tcfc_stats_lock = &p->tcfc_lock; + p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); + p->tcfc_tm.install = jiffies; + p->tcfc_tm.lastuse = jiffies; +#ifdef CONFIG_NET_ESTIMATOR + if (est) + gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + p->tcfc_stats_lock, est); #endif + a->priv = (void *) p; + return p; +} +EXPORT_SYMBOL(tcf_hash_create); + +void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) +{ + unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); + + write_lock_bh(hinfo->lock); + p->tcfc_next = hinfo->htab[h]; + hinfo->htab[h] = p; + write_unlock_bh(hinfo->lock); +} +EXPORT_SYMBOL(tcf_hash_insert); static struct tc_action_ops *act_base = NULL; static DEFINE_RWLOCK(act_mod_lock); @@ -156,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n", - skb, skb->input_dev ? skb->input_dev->name : "xxx", - skb->dev->name); ret = TC_ACT_OK; goto exec_done; } @@ -188,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind) for (a = act; a; a = act) { if (a->ops && a->ops->cleanup) { - DPRINTK("tcf_action_destroy destroying %p next %p\n", - a, a->next); if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -251,15 +459,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, bind, ref); if (err < 0) - goto rtattr_failure; + goto errout; r->rta_len = skb->tail - (u8*)r; } return 0; rtattr_failure: + err = -EINVAL; +errout: skb_trim(skb, b - skb->data); - return -err; + return err; } struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, @@ -306,14 +516,14 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, goto err_mod; } #endif + *err = -ENOENT; goto err_out; } *err = -ENOMEM; - a = kmalloc(sizeof(*a), GFP_KERNEL); + a = kzalloc(sizeof(*a), GFP_KERNEL); if (a == NULL) goto err_mod; - memset(a, 0, sizeof(*a)); /* backward compatibility for policer */ if (name == NULL) @@ -330,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, if (*err != ACT_P_CREATED) module_put(a_o->owner); a->ops = a_o; - DPRINTK("tcf_action_init_1: successfull %s\n", act_name); *err = 0; return a; @@ -391,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->stats_lock, &d); + TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->stats_lock, &d); + h->tcf_stats_lock, &d); if (err < 0) goto errout; @@ -405,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (a->ops->get_stats(skb, a) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &h->bstats) < 0 || + if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &h->qstats) < 0) + gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) @@ -458,7 +667,6 @@ static int act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; - int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -467,10 +675,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) kfree_skb(skb); return -EINVAL; } - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - if (err > 0) - err = 0; - return err; + + return rtnl_unicast(skb, pid); } static struct tc_action * @@ -490,10 +696,9 @@ tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err) index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]); *err = -ENOMEM; - a = kmalloc(sizeof(struct tc_action), GFP_KERNEL); + a = kzalloc(sizeof(struct tc_action), GFP_KERNEL); if (a == NULL) return NULL; - memset(a, 0, sizeof(struct tc_action)); *err = -EINVAL; a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]); @@ -529,12 +734,11 @@ static struct tc_action *create_a(int i) { struct tc_action *act; - act = kmalloc(sizeof(*act), GFP_KERNEL); + act = kzalloc(sizeof(*act), GFP_KERNEL); if (act == NULL) { printk("create_a: failed to alloc!\n"); return NULL; } - memset(act, 0, sizeof(*act)); act->order = i; return act; } @@ -600,8 +804,8 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) return err; rtattr_failure: - module_put(a->ops->owner); nlmsg_failure: + module_put(a->ops->owner); err_out: kfree_skb(skb); kfree(a); @@ -777,7 +981,7 @@ replay: return ret; } -static char * +static struct rtattr * find_dump_kind(struct nlmsghdr *n) { struct rtattr *tb1, *tb2[TCA_ACT_MAX+1]; @@ -805,7 +1009,7 @@ find_dump_kind(struct nlmsghdr *n) return NULL; kind = tb2[TCA_ACT_KIND-1]; - return (char *) RTA_DATA(kind); + return kind; } static int @@ -818,16 +1022,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action a; int ret = 0; struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); - char *kind = find_dump_kind(cb->nlh); + struct rtattr *kind = find_dump_kind(cb->nlh); if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; } - a_o = tc_lookup_action_n(kind); + a_o = tc_lookup_action(kind); if (a_o == NULL) { - printk("failed to find %s\n", kind); return 0; } @@ -835,7 +1038,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) a.ops = a_o; if (a_o->walk == NULL) { - printk("tc_dump_action: %s !capable of dumping table\n", kind); + printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); goto rtattr_failure; } @@ -883,8 +1086,6 @@ static int __init tc_action_init(void) link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; } - printk("TC classifier action (bugs to netdev@vger.kernel.org cc " - "hadi@cyberus.ca)\n"); return 0; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index a1e68f78dcc2..6cff56696a81 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -13,7 +13,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -35,48 +34,43 @@ #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 - -static u32 idx_gen; -static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE]; +#define GACT_TAB_MASK 15 +static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; +static u32 gact_idx_gen; static DEFINE_RWLOCK(gact_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_gact -#define tc_st tc_gact -#define tcf_t_lock gact_lock -#define tcf_ht tcf_gact_ht - -#define CONFIG_NET_ACT_INIT 1 -#include <net/pkt_act.h> +static struct tcf_hashinfo gact_hash_info = { + .htab = tcf_gact_ht, + .hmask = GACT_TAB_MASK, + .lock = &gact_lock, +}; #ifdef CONFIG_GACT_PROB -static int gact_net_rand(struct tcf_gact *p) +static int gact_net_rand(struct tcf_gact *gact) { - if (net_random()%p->pval) - return p->action; - return p->paction; + if (net_random() % gact->tcfg_pval) + return gact->tcf_action; + return gact->tcfg_paction; } -static int gact_determ(struct tcf_gact *p) +static int gact_determ(struct tcf_gact *gact) { - if (p->bstats.packets%p->pval) - return p->action; - return p->paction; + if (gact->tcf_bstats.packets % gact->tcfg_pval) + return gact->tcf_action; + return gact->tcfg_paction; } -typedef int (*g_rand)(struct tcf_gact *p); +typedef int (*g_rand)(struct tcf_gact *gact); static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; -#endif +#endif /* CONFIG_GACT_PROB */ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm; - struct tcf_gact *p; + struct tcf_gact *gact; + struct tcf_common *pc; int ret = 0; if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) @@ -95,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, return -EOPNOTSUPP; #endif - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), + bind, &gact_idx_gen, &gact_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_hash_release(p, bind); + tcf_hash_release(pc, bind, &gact_hash_info); return -EEXIST; } } - spin_lock_bh(&p->lock); - p->action = parm->action; + gact = to_gact(pc); + + spin_lock_bh(&gact->tcf_lock); + gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB-1] != NULL) { struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); - p->paction = p_parm->paction; - p->pval = p_parm->pval; - p->ptype = p_parm->ptype; + gact->tcfg_paction = p_parm->paction; + gact->tcfg_pval = p_parm->pval; + gact->tcfg_ptype = p_parm->ptype; } #endif - spin_unlock_bh(&p->lock); + spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &gact_hash_info); return ret; } -static int -tcf_gact_cleanup(struct tc_action *a, int bind) +static int tcf_gact_cleanup(struct tc_action *a, int bind) { - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; - if (p != NULL) - return tcf_hash_release(p, bind); + if (gact) + return tcf_hash_release(&gact->common, bind, &gact_hash_info); return 0; } -static int -tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; int action = TC_ACT_SHOT; - spin_lock(&p->lock); + spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (p->ptype && gact_rand[p->ptype] != NULL) - action = gact_rand[p->ptype](p); + if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + action = gact_rand[gact->tcfg_ptype](gact); else - action = p->action; + action = gact->tcf_action; #else - action = p->action; + action = gact->tcf_action; #endif - p->bstats.bytes += skb->len; - p->bstats.packets++; + gact->tcf_bstats.bytes += skb->len; + gact->tcf_bstats.packets++; if (action == TC_ACT_SHOT) - p->qstats.drops++; - p->tm.lastuse = jiffies; - spin_unlock(&p->lock); + gact->tcf_qstats.drops++; + gact->tcf_tm.lastuse = jiffies; + spin_unlock(&gact->tcf_lock); return action; } -static int -tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_gact opt; - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; struct tcf_t t; - opt.index = p->index; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.action = p->action; + opt.index = gact->tcf_index; + opt.refcnt = gact->tcf_refcnt - ref; + opt.bindcnt = gact->tcf_bindcnt - bind; + opt.action = gact->tcf_action; RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB - if (p->ptype) { + if (gact->tcfg_ptype) { struct tc_gact_p p_opt; - p_opt.paction = p->paction; - p_opt.pval = p->pval; - p_opt.ptype = p->ptype; + p_opt.paction = gact->tcfg_paction; + p_opt.pval = gact->tcfg_pval; + p_opt.ptype = gact->tcfg_ptype; RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_gact_ops = { .kind = "gact", + .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -209,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Classifier actions"); MODULE_LICENSE("GPL"); -static int __init -gact_init_module(void) +static int __init gact_init_module(void) { #ifdef CONFIG_GACT_PROB printk("GACT probability on\n"); @@ -220,8 +214,7 @@ gact_init_module(void) return tcf_register_action(&act_gact_ops); } -static void __exit -gact_cleanup_module(void) +static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 37640c6fc014..d8c9310da6e5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -39,25 +38,19 @@ #include <linux/netfilter_ipv4/ip_tables.h> -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE]; -/* ipt hash table lock */ +#define IPT_TAB_MASK 15 +static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; +static u32 ipt_idx_gen; static DEFINE_RWLOCK(ipt_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_ipt -#define tcf_t_lock ipt_lock -#define tcf_ht tcf_ipt_ht - -#define CONFIG_NET_ACT_INIT -#include <net/pkt_act.h> +static struct tcf_hashinfo ipt_hash_info = { + .htab = tcf_ipt_ht, + .hmask = IPT_TAB_MASK, + .lock = &ipt_lock, +}; -static int -ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) +static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { struct ipt_target *target; int ret = 0; @@ -66,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) if (!target) return -ENOENT; - DPRINTK("ipt_init_target: found %s\n", target->name); t->u.kernel.target = target; ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), @@ -77,10 +69,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(table, NULL, t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t), hook)) { - DPRINTK("ipt_init_target: check failed for `%s'.\n", - t->u.kernel.target->name); module_put(t->u.kernel.target->me); ret = -EINVAL; } @@ -88,40 +77,37 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) return ret; } -static void -ipt_destroy_target(struct ipt_entry_target *t) +static void ipt_destroy_target(struct ipt_entry_target *t) { if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); } -static int -tcf_ipt_release(struct tcf_ipt *p, int bind) +static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) { int ret = 0; - if (p) { + if (ipt) { if (bind) - p->bindcnt--; - p->refcnt--; - if (p->bindcnt <= 0 && p->refcnt <= 0) { - ipt_destroy_target(p->t); - kfree(p->tname); - kfree(p->t); - tcf_hash_destroy(p); + ipt->tcf_bindcnt--; + ipt->tcf_refcnt--; + if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); + tcf_hash_destroy(&ipt->common, &ipt_hash_info); ret = ACT_P_DELETED; } } return ret; } -static int -tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_IPT_MAX]; - struct tcf_ipt *p; + struct tcf_ipt *ipt; + struct tcf_common *pc; struct ipt_entry_target *td, *t; char *tname; int ret = 0, err; @@ -145,49 +131,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); - p = tcf_hash_check(index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_check(index, a, bind, &ipt_hash_info); + if (!pc) { + pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, + &ipt_idx_gen, &ipt_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_ipt_release(p, bind); + tcf_ipt_release(to_ipt(pc), bind); return -EEXIST; } } + ipt = to_ipt(pc); hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); err = -ENOMEM; tname = kmalloc(IFNAMSIZ, GFP_KERNEL); - if (tname == NULL) + if (unlikely(!tname)) goto err1; if (tb[TCA_IPT_TABLE - 1] == NULL || rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) strcpy(tname, "mangle"); t = kmalloc(td->u.target_size, GFP_KERNEL); - if (t == NULL) + if (unlikely(!t)) goto err2; memcpy(t, td, td->u.target_size); if ((err = ipt_init_target(t, tname, hook)) < 0) goto err3; - spin_lock_bh(&p->lock); + spin_lock_bh(&ipt->tcf_lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(p->t); - kfree(p->tname); - kfree(p->t); + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); } - p->tname = tname; - p->t = t; - p->hook = hook; - spin_unlock_bh(&p->lock); + ipt->tcfi_tname = tname; + ipt->tcfi_t = t; + ipt->tcfi_hook = hook; + spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &ipt_hash_info); return ret; err3: @@ -195,33 +183,32 @@ err3: err2: kfree(tname); err1: - kfree(p); + kfree(pc); return err; } -static int -tcf_ipt_cleanup(struct tc_action *a, int bind) +static int tcf_ipt_cleanup(struct tc_action *a, int bind) { - struct tcf_ipt *p = PRIV(a, ipt); - return tcf_ipt_release(p, bind); + struct tcf_ipt *ipt = a->priv; + return tcf_ipt_release(ipt, bind); } -static int -tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { int ret = 0, result = 0; - struct tcf_ipt *p = PRIV(a, ipt); + struct tcf_ipt *ipt = a->priv; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return TC_ACT_UNSPEC; } - spin_lock(&p->lock); + spin_lock(&ipt->tcf_lock); - p->tm.lastuse = jiffies; - p->bstats.bytes += skb->len; - p->bstats.packets++; + ipt->tcf_tm.lastuse = jiffies; + ipt->tcf_bstats.bytes += skb->len; + ipt->tcf_bstats.packets++; /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed @@ -230,16 +217,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) /* iptables targets take a double skb pointer in case the skb * needs to be replaced. We don't own the skb, so this must not * happen. The pskb_expand_head above should make sure of this */ - ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, - p->t->u.kernel.target, p->t->data, - NULL); + ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, + ipt->tcfi_hook, + ipt->tcfi_t->u.kernel.target, + ipt->tcfi_t->data); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; - p->qstats.drops++; + ipt->tcf_qstats.drops++; break; case IPT_CONTINUE: result = TC_ACT_PIPE; @@ -250,53 +238,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) result = TC_POLICE_OK; break; } - spin_unlock(&p->lock); + spin_unlock(&ipt->tcf_lock); return result; } -static int -tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { + unsigned char *b = skb->tail; + struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; struct tc_cnt c; - unsigned char *b = skb->tail; - struct tcf_ipt *p = PRIV(a, ipt); /* for simple targets kernel size == user size ** user name = target name ** for foolproof you need to not assume this */ - t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC); - if (t == NULL) + t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); + if (unlikely(!t)) goto rtattr_failure; - c.bindcnt = p->bindcnt - bind; - c.refcnt = p->refcnt - ref; - memcpy(t, p->t, p->t->u.user.target_size); - strcpy(t->u.user.name, p->t->u.kernel.target->name); - - DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname, - strlen(p->tname)); - DPRINTK("\tdump target name %s size %d size user %d " - "data[0] %x data[1] %x\n", p->t->u.kernel.target->name, - p->t->u.target_size, p->t->u.user.target_size, - p->t->data[0], p->t->data[1]); - RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t); - RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index); - RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook); + c.bindcnt = ipt->tcf_bindcnt - bind; + c.refcnt = ipt->tcf_refcnt - ref; + memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); + strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); + + RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); + RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); + RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); - RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname); - tm.install = jiffies_to_clock_t(jiffies - p->tm.install); - tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - tm.expires = jiffies_to_clock_t(p->tm.expires); + RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); + tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); + tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); kfree(t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); kfree(t); return -1; @@ -304,6 +285,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", + .hinfo = &ipt_hash_info, .type = TCA_ACT_IPT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -319,14 +301,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Iptables target actions"); MODULE_LICENSE("GPL"); -static int __init -ipt_init_module(void) +static int __init ipt_init_module(void) { return tcf_register_action(&act_ipt_ops); } -static void __exit -ipt_cleanup_module(void) +static void __exit ipt_cleanup_module(void) { tcf_unregister_action(&act_ipt_ops); } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 4fcccbd50885..483897271f15 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -15,7 +15,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -40,46 +39,39 @@ #include <linux/etherdevice.h> #include <linux/if_arp.h> - -/* use generic hash table */ -#define MY_TAB_SIZE 8 -#define MY_TAB_MASK (MY_TAB_SIZE - 1) -static u32 idx_gen; -static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE]; +#define MIRRED_TAB_MASK 7 +static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; +static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_mirred -#define tc_st tc_mirred -#define tcf_t_lock mirred_lock -#define tcf_ht tcf_mirred_ht - -#define CONFIG_NET_ACT_INIT 1 -#include <net/pkt_act.h> +static struct tcf_hashinfo mirred_hash_info = { + .htab = tcf_mirred_ht, + .hmask = MIRRED_TAB_MASK, + .lock = &mirred_lock, +}; -static inline int -tcf_mirred_release(struct tcf_mirred *p, int bind) +static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) { - if (p) { + if (m) { if (bind) - p->bindcnt--; - p->refcnt--; - if(!p->bindcnt && p->refcnt <= 0) { - dev_put(p->dev); - tcf_hash_destroy(p); + m->tcf_bindcnt--; + m->tcf_refcnt--; + if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { + dev_put(m->tcfm_dev); + tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } } return 0; } -static int -tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_MIRRED_MAX]; struct tc_mirred *parm; - struct tcf_mirred *p; + struct tcf_mirred *m; + struct tcf_common *pc; struct net_device *dev = NULL; int ret = 0; int ok_push = 0; @@ -111,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, } } - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { + pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); + if (!pc) { if (!parm->ifindex) return -EINVAL; - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, + &mirred_idx_gen, &mirred_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_mirred_release(p, bind); + tcf_mirred_release(to_mirred(pc), bind); return -EEXIST; } } + m = to_mirred(pc); - spin_lock_bh(&p->lock); - p->action = parm->action; - p->eaction = parm->eaction; + spin_lock_bh(&m->tcf_lock); + m->tcf_action = parm->action; + m->tcfm_eaction = parm->eaction; if (parm->ifindex) { - p->ifindex = parm->ifindex; + m->tcfm_ifindex = parm->ifindex; if (ret != ACT_P_CREATED) - dev_put(p->dev); - p->dev = dev; + dev_put(m->tcfm_dev); + m->tcfm_dev = dev; dev_hold(dev); - p->ok_push = ok_push; + m->tcfm_ok_push = ok_push; } - spin_unlock_bh(&p->lock); + spin_unlock_bh(&m->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &mirred_hash_info); - DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s " - "ifindex %d\n", parm->index, parm->action, parm->eaction, - dev->name, parm->ifindex); return ret; } -static int -tcf_mirred_cleanup(struct tc_action *a, int bind) +static int tcf_mirred_cleanup(struct tc_action *a, int bind) { - struct tcf_mirred *p = PRIV(a, mirred); + struct tcf_mirred *m = a->priv; - if (p != NULL) - return tcf_mirred_release(p, bind); + if (m) + return tcf_mirred_release(m, bind); return 0; } -static int -tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { - struct tcf_mirred *p = PRIV(a, mirred); + struct tcf_mirred *m = a->priv; struct net_device *dev; struct sk_buff *skb2 = NULL; u32 at = G_TC_AT(skb->tc_verd); - spin_lock(&p->lock); + spin_lock(&m->tcf_lock); - dev = p->dev; - p->tm.lastuse = jiffies; + dev = m->tcfm_dev; + m->tcf_tm.lastuse = jiffies; if (!(dev->flags&IFF_UP) ) { if (net_ratelimit()) @@ -177,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) bad_mirred: if (skb2 != NULL) kfree_skb(skb2); - p->qstats.overlimits++; - p->bstats.bytes += skb->len; - p->bstats.packets++; - spin_unlock(&p->lock); + m->tcf_qstats.overlimits++; + m->tcf_bstats.bytes += skb->len; + m->tcf_bstats.packets++; + spin_unlock(&m->tcf_lock); /* should we be asking for packet to be dropped? * may make sense for redirect case only */ @@ -190,59 +180,59 @@ bad_mirred: skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto bad_mirred; - if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR && + m->tcfm_eaction != TCA_EGRESS_REDIR) { if (net_ratelimit()) - printk("tcf_mirred unknown action %d\n", p->eaction); + printk("tcf_mirred unknown action %d\n", + m->tcfm_eaction); goto bad_mirred; } - p->bstats.bytes += skb2->len; - p->bstats.packets++; + m->tcf_bstats.bytes += skb2->len; + m->tcf_bstats.packets++; if (!(at & AT_EGRESS)) - if (p->ok_push) + if (m->tcfm_ok_push) skb_push(skb2, skb2->dev->hard_header_len); /* mirror is always swallowed */ - if (p->eaction != TCA_EGRESS_MIRROR) + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; skb2->input_dev = skb->dev; dev_queue_xmit(skb2); - spin_unlock(&p->lock); - return p->action; + spin_unlock(&m->tcf_lock); + return m->tcf_action; } -static int -tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_mirred *m = a->priv; struct tc_mirred opt; - struct tcf_mirred *p = PRIV(a, mirred); struct tcf_t t; - opt.index = p->index; - opt.action = p->action; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.eaction = p->eaction; - opt.ifindex = p->ifindex; - DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n", - p->index, p->action, p->eaction, p->ifindex); + opt.index = m->tcf_index; + opt.action = m->tcf_action; + opt.refcnt = m->tcf_refcnt - ref; + opt.bindcnt = m->tcf_bindcnt - bind; + opt.eaction = m->tcfm_eaction; + opt.ifindex = m->tcfm_ifindex; RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(m->tcf_tm.expires); RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", + .hinfo = &mirred_hash_info, .type = TCA_ACT_MIRRED, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -258,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); -static int __init -mirred_init_module(void) +static int __init mirred_init_module(void) { printk("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } -static void __exit -mirred_cleanup_module(void) +static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 1742a68e0122..8ac65c219b98 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -12,7 +12,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -34,32 +33,25 @@ #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> - -#define PEDIT_DEB 1 - -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE]; +#define PEDIT_TAB_MASK 15 +static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; +static u32 pedit_idx_gen; static DEFINE_RWLOCK(pedit_lock); -#define tcf_st tcf_pedit -#define tc_st tc_pedit -#define tcf_t_lock pedit_lock -#define tcf_ht tcf_pedit_ht - -#define CONFIG_NET_ACT_INIT 1 -#include <net/pkt_act.h> +static struct tcf_hashinfo pedit_hash_info = { + .htab = tcf_pedit_ht, + .hmask = PEDIT_TAB_MASK, + .lock = &pedit_lock, +}; -static int -tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_PEDIT_MAX]; struct tc_pedit *parm; int ret = 0; struct tcf_pedit *p; + struct tcf_common *pc; struct tc_pedit_key *keys = NULL; int ksize; @@ -74,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) return -EINVAL; - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { + pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); + if (!pc) { if (!parm->nkeys) return -EINVAL; - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, + &pedit_idx_gen, &pedit_hash_info); + if (unlikely(!pc)) return -ENOMEM; + p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(p); + kfree(pc); return -ENOMEM; } ret = ACT_P_CREATED; } else { + p = to_pedit(pc); if (!ovr) { - tcf_hash_release(p, bind); + tcf_hash_release(pc, bind, &pedit_hash_info); return -EEXIST; } - if (p->nkeys && p->nkeys != parm->nkeys) { + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) return -ENOMEM; } } - spin_lock_bh(&p->lock); - p->flags = parm->flags; - p->action = parm->action; + spin_lock_bh(&p->tcf_lock); + p->tcfp_flags = parm->flags; + p->tcf_action = parm->action; if (keys) { - kfree(p->keys); - p->keys = keys; - p->nkeys = parm->nkeys; + kfree(p->tcfp_keys); + p->tcfp_keys = keys; + p->tcfp_nkeys = parm->nkeys; } - memcpy(p->keys, parm->keys, ksize); - spin_unlock_bh(&p->lock); + memcpy(p->tcfp_keys, parm->keys, ksize); + spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &pedit_hash_info); return ret; } -static int -tcf_pedit_cleanup(struct tc_action *a, int bind) +static int tcf_pedit_cleanup(struct tc_action *a, int bind) { - struct tcf_pedit *p = PRIV(a, pedit); + struct tcf_pedit *p = a->priv; - if (p != NULL) { - struct tc_pedit_key *keys = p->keys; - if (tcf_hash_release(p, bind)) { + if (p) { + struct tc_pedit_key *keys = p->tcfp_keys; + if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) { kfree(keys); return 1; } @@ -129,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) return 0; } -static int -tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { - struct tcf_pedit *p = PRIV(a, pedit); + struct tcf_pedit *p = a->priv; int i, munged = 0; u8 *pptr; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - return p->action; + return p->tcf_action; } } pptr = skb->nh.raw; - spin_lock(&p->lock); + spin_lock(&p->tcf_lock); - p->tm.lastuse = jiffies; + p->tcf_tm.lastuse = jiffies; - if (p->nkeys > 0) { - struct tc_pedit_key *tkey = p->keys; + if (p->tcfp_nkeys > 0) { + struct tc_pedit_key *tkey = p->tcfp_keys; - for (i = p->nkeys; i > 0; i--, tkey++) { + for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr; int offset = tkey->off; @@ -170,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) printk("offset must be on 32 bit boundaries\n"); goto bad; } - if (skb->len < 0 || (offset > 0 && offset > skb->len)) { + if (skb->len < 0 || + (offset > 0 && offset > skb->len)) { printk("offset %d cant exceed pkt length %d\n", offset, skb->len); goto bad; @@ -186,64 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else { - printk("pedit BUG: index %d\n",p->index); + printk("pedit BUG: index %d\n", p->tcf_index); } bad: - p->qstats.overlimits++; + p->tcf_qstats.overlimits++; done: - p->bstats.bytes += skb->len; - p->bstats.packets++; - spin_unlock(&p->lock); - return p->action; + p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.packets++; + spin_unlock(&p->tcf_lock); + return p->tcf_action; } -static int -tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) +static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_pedit *p = a->priv; struct tc_pedit *opt; - struct tcf_pedit *p = PRIV(a, pedit); struct tcf_t t; int s; - s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key); + s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); /* netlink spinlocks held above us - must use ATOMIC */ - opt = kmalloc(s, GFP_ATOMIC); - if (opt == NULL) + opt = kzalloc(s, GFP_ATOMIC); + if (unlikely(!opt)) return -ENOBUFS; - memset(opt, 0, s); - - memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key)); - opt->index = p->index; - opt->nkeys = p->nkeys; - opt->flags = p->flags; - opt->action = p->action; - opt->refcnt = p->refcnt - ref; - opt->bindcnt = p->bindcnt - bind; - - -#ifdef PEDIT_DEB - { - /* Debug - get rid of later */ - int i; - struct tc_pedit_key *key = opt->keys; - - for (i=0; i<opt->nkeys; i++, key++) { - printk( "\n key #%d",i); - printk( " at %d: val %08x mask %08x", - (unsigned int)key->off, - (unsigned int)key->val, - (unsigned int)key->mask); - } - } -#endif + + memcpy(opt->keys, p->tcfp_keys, + p->tcfp_nkeys * sizeof(struct tc_pedit_key)); + opt->index = p->tcf_index; + opt->nkeys = p->tcfp_nkeys; + opt->flags = p->tcfp_flags; + opt->action = p->tcf_action; + opt->refcnt = p->tcf_refcnt - ref; + opt->bindcnt = p->tcf_bindcnt - bind; RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); kfree(opt); return skb->len; @@ -254,9 +232,9 @@ rtattr_failure: return -1; } -static -struct tc_action_ops act_pedit_ops = { +static struct tc_action_ops act_pedit_ops = { .kind = "pedit", + .hinfo = &pedit_hash_info, .type = TCA_ACT_PEDIT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -272,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Packet Editor actions"); MODULE_LICENSE("GPL"); -static int __init -pedit_init_module(void) +static int __init pedit_init_module(void) { return tcf_register_action(&act_pedit_ops); } -static void __exit -pedit_cleanup_module(void) +static void __exit pedit_cleanup_module(void) { tcf_unregister_action(&act_pedit_ops); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 24c348fa8922..fed47b658837 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -13,7 +13,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -33,43 +32,27 @@ #include <net/sock.h> #include <net/act_api.h> -#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) -#define PRIV(a) ((struct tcf_police *) (a)->priv) +#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) +#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; -/* Policer hash table lock */ +#define POL_TAB_MASK 15 +static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; +static u32 police_idx_gen; static DEFINE_RWLOCK(police_lock); -/* Each policer is serialized by its individual spinlock */ - -static __inline__ unsigned tcf_police_hash(u32 index) -{ - return index&0xF; -} - -static __inline__ struct tcf_police * tcf_police_lookup(u32 index) -{ - struct tcf_police *p; +static struct tcf_hashinfo police_hash_info = { + .htab = tcf_police_ht, + .hmask = POL_TAB_MASK, + .lock = &police_lock, +}; - read_lock(&police_lock); - for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { - if (p->index == index) - break; - } - read_unlock(&police_lock); - return p; -} +/* Each policer is serialized by its individual spinlock */ #ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { - struct tcf_police *p; + struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; @@ -77,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c s_i = cb->args[0]; - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_police_ht[tcf_police_hash(i)]; + for (i = 0; i < (POL_TAB_MASK + 1); i++) { + p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; - for (; p; p = p->next) { + for (; p; p = p->tcfc_next) { index++; if (index < s_i) continue; @@ -111,48 +94,26 @@ rtattr_failure: skb_trim(skb, (u8*)r - skb->data); goto done; } - -static inline int -tcf_act_police_hash_search(struct tc_action *a, u32 index) -{ - struct tcf_police *p = tcf_police_lookup(index); - - if (p != NULL) { - a->priv = p; - return 1; - } else { - return 0; - } -} #endif -static inline u32 tcf_police_new_index(void) -{ - do { - if (++idx_gen == 0) - idx_gen = 1; - } while (tcf_police_lookup(idx_gen)); - - return idx_gen; -} - void tcf_police_destroy(struct tcf_police *p) { - unsigned h = tcf_police_hash(p->index); - struct tcf_police **p1p; + unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); + struct tcf_common **p1p; - for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { - if (*p1p == p) { + for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { + if (*p1p == &p->common) { write_lock_bh(&police_lock); - *p1p = p->next; + *p1p = p->tcf_next; write_unlock_bh(&police_lock); #ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->bstats, &p->rate_est); + gen_kill_estimator(&p->tcf_bstats, + &p->tcf_rate_est); #endif - if (p->R_tab) - qdisc_put_rtab(p->R_tab); - if (p->P_tab) - qdisc_put_rtab(p->P_tab); + if (p->tcfp_R_tab) + qdisc_put_rtab(p->tcfp_R_tab); + if (p->tcfp_P_tab) + qdisc_put_rtab(p->tcfp_P_tab); kfree(p); return; } @@ -168,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, int ret = 0, err; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; - struct tcf_police *p; + struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) @@ -186,28 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) return -EINVAL; - if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { - a->priv = p; - if (bind) { - p->bindcnt += 1; - p->refcnt += 1; + if (parm->index) { + struct tcf_common *pc; + + pc = tcf_hash_lookup(parm->index, &police_hash_info); + if (pc != NULL) { + a->priv = pc; + police = to_police(pc); + if (bind) { + police->tcf_bindcnt += 1; + police->tcf_refcnt += 1; + } + if (ovr) + goto override; + return ret; } - if (ovr) - goto override; - return ret; } - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + police = kzalloc(sizeof(*police), GFP_KERNEL); + if (police == NULL) return -ENOMEM; - memset(p, 0, sizeof(*p)); - ret = ACT_P_CREATED; - p->refcnt = 1; - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; + police->tcf_refcnt = 1; + spin_lock_init(&police->tcf_lock); + police->tcf_stats_lock = &police->tcf_lock; if (bind) - p->bindcnt = 1; + police->tcf_bindcnt = 1; override: if (parm->rate.rate) { err = -ENOMEM; @@ -217,67 +182,71 @@ override: if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1]); - if (p->P_tab == NULL) { + if (P_tab == NULL) { qdisc_put_rtab(R_tab); goto failure; } } } /* No failure allowed after this point */ - spin_lock_bh(&p->lock); + spin_lock_bh(&police->tcf_lock); if (R_tab != NULL) { - qdisc_put_rtab(p->R_tab); - p->R_tab = R_tab; + qdisc_put_rtab(police->tcfp_R_tab); + police->tcfp_R_tab = R_tab; } if (P_tab != NULL) { - qdisc_put_rtab(p->P_tab); - p->P_tab = P_tab; + qdisc_put_rtab(police->tcfp_P_tab); + police->tcfp_P_tab = P_tab; } if (tb[TCA_POLICE_RESULT-1]) - p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - p->toks = p->burst = parm->burst; - p->mtu = parm->mtu; - if (p->mtu == 0) { - p->mtu = ~0; - if (p->R_tab) - p->mtu = 255<<p->R_tab->rate.cell_log; + police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + police->tcfp_toks = police->tcfp_burst = parm->burst; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (police->tcfp_R_tab) + police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; } - if (p->P_tab) - p->ptoks = L2T_P(p, p->mtu); - p->action = parm->action; + if (police->tcfp_P_tab) + police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); + police->tcf_action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) - p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + police->tcfp_ewma_rate = + *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); if (est) - gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); + gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + police->tcf_stats_lock, est); #endif - spin_unlock_bh(&p->lock); + spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) return ret; - PSCHED_GET_TIME(p->t_c); - p->index = parm->index ? : tcf_police_new_index(); - h = tcf_police_hash(p->index); + PSCHED_GET_TIME(police->tcfp_t_c); + police->tcf_index = parm->index ? parm->index : + tcf_hash_new_index(&police_idx_gen, &police_hash_info); + h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); - p->next = tcf_police_ht[h]; - tcf_police_ht[h] = p; + police->tcf_next = tcf_police_ht[h]; + tcf_police_ht[h] = &police->common; write_unlock_bh(&police_lock); - a->priv = p; + a->priv = police; return ret; failure: if (ret == ACT_P_CREATED) - kfree(p); + kfree(police); return err; } static int tcf_act_police_cleanup(struct tc_action *a, int bind) { - struct tcf_police *p = PRIV(a); + struct tcf_police *p = a->priv; if (p != NULL) return tcf_police_release(p, bind); @@ -287,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { + struct tcf_police *police = a->priv; psched_time_t now; - struct tcf_police *p = PRIV(a); long toks; long ptoks = 0; - spin_lock(&p->lock); + spin_lock(&police->tcf_lock); - p->bstats.bytes += skb->len; - p->bstats.packets++; + police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + if (police->tcfp_ewma_rate && + police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } #endif - if (skb->len <= p->mtu) { - if (p->R_tab == NULL) { - spin_unlock(&p->lock); - return p->result; + if (skb->len <= police->tcfp_mtu) { + if (police->tcfp_R_tab == NULL) { + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); - - if (p->P_tab) { - ptoks = toks + p->ptoks; - if (ptoks > (long)L2T_P(p, p->mtu)) - ptoks = (long)L2T_P(p, p->mtu); - ptoks -= L2T_P(p, skb->len); + toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, + police->tcfp_burst); + if (police->tcfp_P_tab) { + ptoks = toks + police->tcfp_ptoks; + if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) + ptoks = (long)L2T_P(police, police->tcfp_mtu); + ptoks -= L2T_P(police, skb->len); } - toks += p->toks; - if (toks > (long)p->burst) - toks = p->burst; - toks -= L2T(p, skb->len); - + toks += police->tcfp_toks; + if (toks > (long)police->tcfp_burst) + toks = police->tcfp_burst; + toks -= L2T(police, skb->len); if ((toks|ptoks) >= 0) { - p->t_c = now; - p->toks = toks; - p->ptoks = ptoks; - spin_unlock(&p->lock); - return p->result; + police->tcfp_t_c = now; + police->tcfp_toks = toks; + police->tcfp_ptoks = ptoks; + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } } - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_police *police = a->priv; struct tc_police opt; - struct tcf_police *p = PRIV(a); - - opt.index = p->index; - opt.action = p->action; - opt.mtu = p->mtu; - opt.burst = p->burst; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - if (p->R_tab) - opt.rate = p->R_tab->rate; + + opt.index = police->tcf_index; + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = police->tcfp_burst; + opt.refcnt = police->tcf_refcnt - ref; + opt.bindcnt = police->tcf_bindcnt - bind; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (p->P_tab) - opt.peakrate = p->P_tab->rate; + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (p->result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); + if (police->tcfp_result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), + &police->tcfp_result); #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); + if (police->tcfp_ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); #endif return skb->len; @@ -381,13 +351,14 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", + .hinfo = &police_hash_info, .type = TCA_ID_POLICE, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_act_police_hash_search, + .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; @@ -409,10 +380,39 @@ module_exit(police_cleanup_module); #else /* CONFIG_NET_CLS_ACT */ -struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) +static struct tcf_common *tcf_police_lookup(u32 index) { - unsigned h; - struct tcf_police *p; + struct tcf_hashinfo *hinfo = &police_hash_info; + struct tcf_common *p; + + read_lock(hinfo->lock); + for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; + p = p->tcfc_next) { + if (p->tcfc_index == index) + break; + } + read_unlock(hinfo->lock); + + return p; +} + +static u32 tcf_police_new_index(void) +{ + u32 *idx_gen = &police_idx_gen; + u32 val = *idx_gen; + + do { + if (++val == 0) + val = 1; + } while (tcf_police_lookup(val)); + + return (*idx_gen = val); +} + +struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) +{ + unsigned int h; + struct tcf_police *police; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; @@ -425,150 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { - p->refcnt++; - return p; - } + if (parm->index) { + struct tcf_common *pc; - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + pc = tcf_police_lookup(parm->index); + if (pc) { + police = to_police(pc); + police->tcf_refcnt++; + return police; + } + } + police = kzalloc(sizeof(*police), GFP_KERNEL); + if (unlikely(!police)) return NULL; - memset(p, 0, sizeof(*p)); - p->refcnt = 1; - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; + police->tcf_refcnt = 1; + spin_lock_init(&police->tcf_lock); + police->tcf_stats_lock = &police->tcf_lock; if (parm->rate.rate) { - p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (p->R_tab == NULL) + police->tcfp_R_tab = + qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); + if (police->tcfp_R_tab == NULL) goto failure; if (parm->peakrate.rate) { - p->P_tab = qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (p->P_tab == NULL) + police->tcfp_P_tab = + qdisc_get_rtab(&parm->peakrate, + tb[TCA_POLICE_PEAKRATE-1]); + if (police->tcfp_P_tab == NULL) goto failure; } } if (tb[TCA_POLICE_RESULT-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) goto failure; - p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; - p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + police->tcfp_ewma_rate = + *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } #endif - p->toks = p->burst = parm->burst; - p->mtu = parm->mtu; - if (p->mtu == 0) { - p->mtu = ~0; - if (p->R_tab) - p->mtu = 255<<p->R_tab->rate.cell_log; + police->tcfp_toks = police->tcfp_burst = parm->burst; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (police->tcfp_R_tab) + police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; } - if (p->P_tab) - p->ptoks = L2T_P(p, p->mtu); - PSCHED_GET_TIME(p->t_c); - p->index = parm->index ? : tcf_police_new_index(); - p->action = parm->action; + if (police->tcfp_P_tab) + police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); + PSCHED_GET_TIME(police->tcfp_t_c); + police->tcf_index = parm->index ? parm->index : + tcf_police_new_index(); + police->tcf_action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (est) - gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); + gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, + police->tcf_stats_lock, est); #endif - h = tcf_police_hash(p->index); + h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); - p->next = tcf_police_ht[h]; - tcf_police_ht[h] = p; + police->tcf_next = tcf_police_ht[h]; + tcf_police_ht[h] = &police->common; write_unlock_bh(&police_lock); - return p; + return police; failure: - if (p->R_tab) - qdisc_put_rtab(p->R_tab); - kfree(p); + if (police->tcfp_R_tab) + qdisc_put_rtab(police->tcfp_R_tab); + kfree(police); return NULL; } -int tcf_police(struct sk_buff *skb, struct tcf_police *p) +int tcf_police(struct sk_buff *skb, struct tcf_police *police) { psched_time_t now; long toks; long ptoks = 0; - spin_lock(&p->lock); + spin_lock(&police->tcf_lock); - p->bstats.bytes += skb->len; - p->bstats.packets++; + police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + if (police->tcfp_ewma_rate && + police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } #endif - - if (skb->len <= p->mtu) { - if (p->R_tab == NULL) { - spin_unlock(&p->lock); - return p->result; + if (skb->len <= police->tcfp_mtu) { + if (police->tcfp_R_tab == NULL) { + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } PSCHED_GET_TIME(now); - - toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); - - if (p->P_tab) { - ptoks = toks + p->ptoks; - if (ptoks > (long)L2T_P(p, p->mtu)) - ptoks = (long)L2T_P(p, p->mtu); - ptoks -= L2T_P(p, skb->len); + toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, + police->tcfp_burst); + if (police->tcfp_P_tab) { + ptoks = toks + police->tcfp_ptoks; + if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) + ptoks = (long)L2T_P(police, police->tcfp_mtu); + ptoks -= L2T_P(police, skb->len); } - toks += p->toks; - if (toks > (long)p->burst) - toks = p->burst; - toks -= L2T(p, skb->len); - + toks += police->tcfp_toks; + if (toks > (long)police->tcfp_burst) + toks = police->tcfp_burst; + toks -= L2T(police, skb->len); if ((toks|ptoks) >= 0) { - p->t_c = now; - p->toks = toks; - p->ptoks = ptoks; - spin_unlock(&p->lock); - return p->result; + police->tcfp_t_c = now; + police->tcfp_toks = toks; + police->tcfp_ptoks = ptoks; + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } } - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } EXPORT_SYMBOL(tcf_police); -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) +int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) { - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct tc_police opt; - opt.index = p->index; - opt.action = p->action; - opt.mtu = p->mtu; - opt.burst = p->burst; - if (p->R_tab) - opt.rate = p->R_tab->rate; + opt.index = police->tcf_index; + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = police->tcfp_burst; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (p->P_tab) - opt.peakrate = p->P_tab->rate; + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (p->result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); + if (police->tcfp_result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), + &police->tcfp_result); #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); + if (police->tcfp_ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); #endif return skb->len; @@ -577,19 +585,20 @@ rtattr_failure: return -1; } -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) +int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) { struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, p->stats_lock, &d) < 0) + TCA_XSTATS, police->tcf_stats_lock, + &d) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || + if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &p->qstats) < 0) + gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e5f2e1f431e2..901571a67707 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -10,7 +10,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> @@ -21,54 +20,175 @@ #define TCA_ACT_SIMP 22 -/* XXX: Hide all these common elements under some macro - * probably -*/ #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> -/* use generic hash table with 8 buckets */ -#define MY_TAB_SIZE 8 -#define MY_TAB_MASK (MY_TAB_SIZE - 1) -static u32 idx_gen; -static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE]; +#define SIMP_TAB_MASK 7 +static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; +static u32 simp_idx_gen; static DEFINE_RWLOCK(simp_lock); -/* override the defaults */ -#define tcf_st tcf_defact -#define tc_st tc_defact -#define tcf_t_lock simp_lock -#define tcf_ht tcf_simp_ht - -#define CONFIG_NET_ACT_INIT 1 -#include <net/pkt_act.h> -#include <net/act_generic.h> +static struct tcf_hashinfo simp_hash_info = { + .htab = tcf_simp_ht, + .hmask = SIMP_TAB_MASK, + .lock = &simp_lock, +}; static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_defact *p = PRIV(a, defact); + struct tcf_defact *d = a->priv; - spin_lock(&p->lock); - p->tm.lastuse = jiffies; - p->bstats.bytes += skb->len; - p->bstats.packets++; + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += skb->len; + d->tcf_bstats.packets++; /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) **/ - printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets); - spin_unlock(&p->lock); - return p->action; + printk("simple: %s_%d\n", + (char *)d->tcfd_defdata, d->tcf_bstats.packets); + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static int tcf_simp_release(struct tcf_defact *d, int bind) +{ + int ret = 0; + if (d) { + if (bind) + d->tcf_bindcnt--; + d->tcf_refcnt--; + if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { + kfree(d->tcfd_defdata); + tcf_hash_destroy(&d->common, &simp_hash_info); + ret = 1; + } + } + return ret; +} + +static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +{ + d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); + if (unlikely(!d->tcfd_defdata)) + return -ENOMEM; + d->tcfd_datalen = datalen; + memcpy(d->tcfd_defdata, defdata, datalen); + return 0; +} + +static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +{ + kfree(d->tcfd_defdata); + return alloc_defdata(d, datalen, defdata); +} + +static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_DEF_MAX]; + struct tc_defact *parm; + struct tcf_defact *d; + struct tcf_common *pc; + void *defdata; + u32 datalen = 0; + int ret = 0; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_DEF_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + + parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); + defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); + if (defdata == NULL) + return -EINVAL; + + datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); + if (datalen <= 0) + return -EINVAL; + + pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &simp_idx_gen, &simp_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_defact(pc); + ret = alloc_defdata(d, datalen, defdata); + if (ret < 0) { + kfree(pc); + return ret; + } + ret = ACT_P_CREATED; + } else { + d = to_defact(pc); + if (!ovr) { + tcf_simp_release(d, bind); + return -EEXIST; + } + realloc_defdata(d, datalen, defdata); + } + + spin_lock_bh(&d->tcf_lock); + d->tcf_action = parm->action; + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &simp_hash_info); + return ret; +} + +static inline int tcf_simp_cleanup(struct tc_action *a, int bind) +{ + struct tcf_defact *d = a->priv; + + if (d) + return tcf_simp_release(d, bind); + return 0; +} + +static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb->tail; + struct tcf_defact *d = a->priv; + struct tc_defact opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); + RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } static struct tc_action_ops act_simp_ops = { - .kind = "simple", - .type = TCA_ACT_SIMP, - .capab = TCA_CAP_NONE, - .owner = THIS_MODULE, - .act = tcf_simp, - tca_use_default_ops + .kind = "simple", + .hinfo = &simp_hash_info, + .type = TCA_ACT_SIMP, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_simp, + .dump = tcf_simp_dump, + .cleanup = tcf_simp_cleanup, + .init = tcf_simp_init, + .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b4d89fbb3782..37a184021647 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -402,7 +401,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) return skb->len; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); if (!tcm->tcm_parent) q = dev->qdisc_sleeping; else @@ -459,7 +458,7 @@ errout: if (cl) cops->put(q, cl); out: - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); dev_put(dev); return skb->len; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index dfb300bb6baa..09fda68c8b39 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -179,25 +178,23 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, err = -ENOBUFS; if (head == NULL) { - head = kmalloc(sizeof(*head), GFP_KERNEL); + head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) goto errout; - memset(head, 0, sizeof(*head)); INIT_LIST_HEAD(&head->flist); tp->root = head; } - f = kmalloc(sizeof(*f), GFP_KERNEL); + f = kzalloc(sizeof(*f), GFP_KERNEL); if (f == NULL) goto errout; - memset(f, 0, sizeof(*f)); err = -EINVAL; if (handle) f->handle = handle; else { - int i = 0x80000000; + unsigned int i = 0x80000000; do { if (++head->hgenerator == 0x7FFFFFFF) head->hgenerator = 1; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 75470486e405..e54acc6bcccd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -18,7 +18,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -51,6 +50,7 @@ struct fw_head { struct fw_filter *ht[HTSIZE]; + u32 mask; }; struct fw_filter @@ -102,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_filter *f; int r; #ifdef CONFIG_NETFILTER - u32 id = skb->nfmark; + u32 id = skb->nfmark & head->mask; #else u32 id = 0; #endif @@ -210,7 +210,9 @@ static int fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, struct rtattr **tb, struct rtattr **tca, unsigned long base) { + struct fw_head *head = (struct fw_head *)tp->root; struct tcf_exts e; + u32 mask; int err; err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); @@ -233,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + goto errout; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + if (mask != head->mask) + goto errout; + } else if (head->mask != 0xFFFFFFFF) + goto errout; + tcf_exts_change(tp, &f->exts, &e); return 0; @@ -268,20 +279,26 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, return -EINVAL; if (head == NULL) { - head = kmalloc(sizeof(struct fw_head), GFP_KERNEL); + u32 mask = 0xFFFFFFFF; + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + return -EINVAL; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + } + + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; - memset(head, 0, sizeof(*head)); + head->mask = mask; tcf_tree_lock(tp); tp->root = head; tcf_tree_unlock(tp); } - f = kmalloc(sizeof(struct fw_filter), GFP_KERNEL); + f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); if (f == NULL) return -ENOBUFS; - memset(f, 0, sizeof(*f)); f->id = handle; @@ -333,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { + struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter*)fh; unsigned char *b = skb->tail; struct rtattr *rta; @@ -354,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, if (strlen(f->indev)) RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); #endif /* CONFIG_NET_CLS_IND */ + if (head->mask != 0xFFFFFFFF) + RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 520ff716dab2..d3aea730d4c8 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,7 +10,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> @@ -397,10 +396,9 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, h1 = to_hash(nhandle); if ((b = head->table[h1]) == NULL) { err = -ENOBUFS; - b = kmalloc(sizeof(struct route4_bucket), GFP_KERNEL); + b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); if (b == NULL) goto errout; - memset(b, 0, sizeof(*b)); tcf_tree_lock(tp); head->table[h1] = b; @@ -476,20 +474,18 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, err = -ENOBUFS; if (head == NULL) { - head = kmalloc(sizeof(struct route4_head), GFP_KERNEL); + head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); if (head == NULL) goto errout; - memset(head, 0, sizeof(struct route4_head)); tcf_tree_lock(tp); tp->root = head; tcf_tree_unlock(tp); } - f = kmalloc(sizeof(struct route4_filter), GFP_KERNEL); + f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); if (f == NULL) goto errout; - memset(f, 0, sizeof(*f)); err = route4_set_parms(tp, base, f, handle, head, tb, tca[TCA_RATE-1], 1); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 572f06be3b02..6e230ecfba05 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -65,7 +65,6 @@ Well, as result, despite its simplicity, we get a pretty powerful classification engine. */ -#include <linux/config.h> struct rsvp_head { @@ -241,9 +240,8 @@ static int rsvp_init(struct tcf_proto *tp) { struct rsvp_head *data; - data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL); + data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL); if (data) { - memset(data, 0, sizeof(struct rsvp_head)); tp->root = data; return 0; } @@ -447,11 +445,10 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, goto errout2; err = -ENOBUFS; - f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL); + f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL); if (f == NULL) goto errout2; - memset(f, 0, sizeof(*f)); h2 = 16; if (tb[TCA_RSVP_SRC-1]) { err = -EINVAL; @@ -533,10 +530,9 @@ insert: /* No session found. Create new one. */ err = -ENOBUFS; - s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL); + s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL); if (s == NULL) goto errout; - memset(s, 0, sizeof(*s)); memcpy(s->dst, dst, sizeof(s->dst)); if (pinfo) { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9f921174c8ab..5af8a59e1503 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -4,7 +4,6 @@ * Written 1998,1999 by Werner Almesberger, EPFL ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -149,11 +148,10 @@ static int tcindex_init(struct tcf_proto *tp) struct tcindex_data *p; DPRINTK("tcindex_init(tp %p)\n",tp); - p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL); + p = kzalloc(sizeof(struct tcindex_data),GFP_KERNEL); if (!p) return -ENOMEM; - memset(p, 0, sizeof(*p)); p->mask = 0xffff; p->hash = DEFAULT_HASH_SIZE; p->fall_through = 1; @@ -297,16 +295,14 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, err = -ENOMEM; if (!cp.perfect && !cp.h) { if (valid_perfect_hash(&cp)) { - cp.perfect = kmalloc(cp.hash * sizeof(*r), GFP_KERNEL); + cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); if (!cp.perfect) goto errout; - memset(cp.perfect, 0, cp.hash * sizeof(*r)); balloc = 1; } else { - cp.h = kmalloc(cp.hash * sizeof(f), GFP_KERNEL); + cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); if (!cp.h) goto errout; - memset(cp.h, 0, cp.hash * sizeof(f)); balloc = 2; } } @@ -317,10 +313,9 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, r = tcindex_lookup(&cp, handle) ? : &new_filter_result; if (r == &new_filter_result) { - f = kmalloc(sizeof(*f), GFP_KERNEL); + f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) goto errout_alloc; - memset(f, 0, sizeof(*f)); } if (tb[TCA_TCINDEX_CLASSID-1]) { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 78e052591fa9..0a6cfa0005be 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -33,7 +33,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -308,23 +307,21 @@ static int u32_init(struct tcf_proto *tp) if (tp_c->q == tp->q) break; - root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL); + root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) return -ENOBUFS; - memset(root_ht, 0, sizeof(*root_ht)); root_ht->divisor = 0; root_ht->refcnt++; root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; root_ht->prio = tp->prio; if (tp_c == NULL) { - tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL); + tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); if (tp_c == NULL) { kfree(root_ht); return -ENOBUFS; } - memset(tp_c, 0, sizeof(*tp_c)); tp_c->q = tp->q; tp_c->next = u32_list; u32_list = tp_c; @@ -572,10 +569,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, if (handle == 0) return -ENOMEM; } - ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); + ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; - memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*)); ht->tp_c = tp_c; ht->refcnt = 0; ht->divisor = divisor; @@ -618,18 +614,16 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, s = RTA_DATA(tb[TCA_U32_SEL-1]); - n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); + n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); if (n == NULL) return -ENOBUFS; - memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key)); #ifdef CONFIG_CLS_U32_PERF - n->pf = kmalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL); + n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL); if (n->pf == NULL) { kfree(n); return -ENOBUFS; } - memset(n->pf, 0, sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64)); #endif memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); @@ -802,7 +796,7 @@ static int __init init_u32(void) { printk("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF - printk(" Perfomance counters on\n"); + printk(" Performance counters on\n"); #endif #ifdef CONFIG_NET_CLS_POLICE printk(" OLD policer on \n"); diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index bf1f00f8b1bf..8ed93c39b4ea 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 700844d49d79..61e3b740ab1a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -58,7 +58,6 @@ * only available if that subsytem is enabled in the kernel. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -774,10 +773,9 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len, TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX) goto errout; - meta = kmalloc(sizeof(*meta), GFP_KERNEL); + meta = kzalloc(sizeof(*meta), GFP_KERNEL); if (meta == NULL) goto errout; - memset(meta, 0, sizeof(*meta)); memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left)); memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right)); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 71ea926a9f09..cc80babfd79f 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 77beabc91fa3..aa17d8f7c4c8 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 34e7e51e601e..e3ddfce0ac8d 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -12,7 +12,6 @@ * Based on net/sched/cls_u32.c */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5cb956b721e8..0fd0768a17c6 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -81,7 +81,6 @@ * open up a beer to watch the compilation going. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -322,10 +321,9 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta, list_len = RTA_PAYLOAD(rt_list); matches_len = tree_hdr->nmatches * sizeof(*em); - tree->matches = kmalloc(matches_len, GFP_KERNEL); + tree->matches = kzalloc(matches_len, GFP_KERNEL); if (tree->matches == NULL) goto errout; - memset(tree->matches, 0, matches_len); /* We do not use rtattr_parse_nested here because the maximum * number of attributes is unknown. This saves us the allocation diff --git a/net/sched/estimator.c b/net/sched/estimator.c index 5d3ae03e22a7..0ebc98e9be2d 100644 --- a/net/sched/estimator.c +++ b/net/sched/estimator.c @@ -139,11 +139,10 @@ int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct r if (parm->interval < -2 || parm->interval > 3) return -EINVAL; - est = kmalloc(sizeof(*est), GFP_KERNEL); + est = kzalloc(sizeof(*est), GFP_KERNEL); if (est == NULL) return -ENOBUFS; - memset(est, 0, sizeof(*est)); est->interval = parm->interval + 2; est->stats = stats; est->stats_lock = stats_lock; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 31570b9a6e9a..0b6489291140 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -15,7 +15,6 @@ * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -196,14 +195,14 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { if (q->handle == handle) { - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); return q; } } - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); return NULL; } @@ -431,7 +430,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) } #endif - err = -EINVAL; + err = -ENOENT; if (ops == NULL) goto err_out; @@ -838,7 +837,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) continue; if (idx > s_idx) s_q_idx = 0; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); q_idx = 0; list_for_each_entry(q, &dev->qdisc_list, list) { if (q_idx < s_q_idx) { @@ -847,12 +846,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) } if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) { - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); goto done; } q_idx++; } - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); } done: @@ -1075,7 +1074,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { if (t < s_t || !q->ops->cl_ops || (tcm->tcm_parent && @@ -1097,7 +1096,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) break; t++; } - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); cb->args[0] = t; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ac7cb60d1e25..dbf44da0912f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -3,7 +3,6 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/string.h> diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 81f0b8346d17..cb0c456aa349 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -11,7 +11,6 @@ * Note: Quantum tunneling is not supported. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6cd81708bf71..bac881bfe362 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -10,7 +10,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -1927,10 +1926,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t } err = -ENOBUFS; - cl = kmalloc(sizeof(*cl), GFP_KERNEL); + cl = kzalloc(sizeof(*cl), GFP_KERNEL); if (cl == NULL) goto failure; - memset(cl, 0, sizeof(*cl)); cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index f6320ca70493..11c8a2119b96 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -3,7 +3,6 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 033083bf0e74..c2689f4ba8de 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -9,7 +9,6 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 74d4a1dceeec..88c6a99ce53c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -46,11 +45,10 @@ The idea is the following: - enqueue, dequeue are serialized via top level device spinlock dev->queue_lock. - - tree walking is protected by read_lock_bh(qdisc_tree_lock) + - tree walking is protected by read_lock(qdisc_tree_lock) and this lock is used only in process context. - - updates to tree are made under rtnl semaphore or - from softirq context (__qdisc_destroy rcu-callback) - hence this lock needs local bh disabling. + - updates to tree are made only under rtnl semaphore, + hence this lock may be made without local bh disabling. qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! */ @@ -58,14 +56,14 @@ DEFINE_RWLOCK(qdisc_tree_lock); void qdisc_lock_tree(struct net_device *dev) { - write_lock_bh(&qdisc_tree_lock); + write_lock(&qdisc_tree_lock); spin_lock_bh(&dev->queue_lock); } void qdisc_unlock_tree(struct net_device *dev) { spin_unlock_bh(&dev->queue_lock); - write_unlock_bh(&qdisc_tree_lock); + write_unlock(&qdisc_tree_lock); } /* @@ -239,9 +237,7 @@ void __netdev_watchdog_up(struct net_device *dev) static void dev_watchdog_up(struct net_device *dev) { - netif_tx_lock_bh(dev); __netdev_watchdog_up(dev); - netif_tx_unlock_bh(dev); } static void dev_watchdog_down(struct net_device *dev) @@ -433,10 +429,9 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) size = QDISC_ALIGN(sizeof(*sch)); size += ops->priv_size + (QDISC_ALIGNTO - 1); - p = kmalloc(size, GFP_KERNEL); + p = kzalloc(size, GFP_KERNEL); if (!p) goto errout; - memset(p, 0, size); sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; @@ -487,20 +482,6 @@ void qdisc_reset(struct Qdisc *qdisc) static void __qdisc_destroy(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); - struct Qdisc_ops *ops = qdisc->ops; - -#ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif - write_lock(&qdisc_tree_lock); - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - write_unlock(&qdisc_tree_lock); - module_put(ops->owner); - - dev_put(qdisc->dev); kfree((char *) qdisc - qdisc->padded); } @@ -508,32 +489,23 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - struct list_head cql = LIST_HEAD_INIT(cql); - struct Qdisc *cq, *q, *n; + struct Qdisc_ops *ops = qdisc->ops; if (qdisc->flags & TCQ_F_BUILTIN || - !atomic_dec_and_test(&qdisc->refcnt)) + !atomic_dec_and_test(&qdisc->refcnt)) return; - if (!list_empty(&qdisc->list)) { - if (qdisc->ops->cl_ops == NULL) - list_del(&qdisc->list); - else - list_move(&qdisc->list, &cql); - } - - /* unlink inner qdiscs from dev->qdisc_list immediately */ - list_for_each_entry(cq, &cql, list) - list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list) - if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) { - if (q->ops->cl_ops == NULL) - list_del_init(&q->list); - else - list_move_tail(&q->list, &cql); - } - list_for_each_entry_safe(cq, n, &cql, list) - list_del_init(&cq->list); + list_del(&qdisc->list); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); +#endif + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + module_put(ops->owner); + dev_put(qdisc->dev); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -553,15 +525,15 @@ void dev_activate(struct net_device *dev) printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - write_lock_bh(&qdisc_tree_lock); + write_lock(&qdisc_tree_lock); list_add_tail(&qdisc->list, &dev->qdisc_list); - write_unlock_bh(&qdisc_tree_lock); + write_unlock(&qdisc_tree_lock); } else { qdisc = &noqueue_qdisc; } - write_lock_bh(&qdisc_tree_lock); + write_lock(&qdisc_tree_lock); dev->qdisc_sleeping = qdisc; - write_unlock_bh(&qdisc_tree_lock); + write_unlock(&qdisc_tree_lock); } if (!netif_carrier_ok(dev)) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 29a2dd9f3029..18e81a8ffb01 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -18,7 +18,6 @@ * For all the glorious comments look at include/net/red.h */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -407,10 +406,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched_data *q; if (table->tab[dp] == NULL) { - table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL); + table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); if (table->tab[dp] == NULL) return -ENOMEM; - memset(table->tab[dp], 0, sizeof(*q)); } q = table->tab[dp]; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index f1c7bd29f2cd..6a6735a2ed35 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -50,7 +50,6 @@ */ #include <linux/kernel.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> @@ -1124,10 +1123,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (rsc == NULL && fsc == NULL) return -EINVAL; - cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL); + cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL); if (cl == NULL) return -ENOBUFS; - memset(cl, 0, sizeof(struct hfsc_class)); if (rsc != NULL) hfsc_change_rsc(cl, rsc, 0); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3ec95df4a85e..6c058e3660c0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1,4 +1,4 @@ -/* vim: ts=8 sw=8 +/* * net/sched/sch_htb.c Hierarchical token bucket, feed tree version * * This program is free software; you can redistribute it and/or @@ -27,7 +27,6 @@ * * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -69,218 +68,165 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#undef HTB_DEBUG /* compile debugging support (activated by tc tool) */ -#define HTB_RATECM 1 /* whether to use rate computer */ -#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ -#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) -#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock) -#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ +#define HTB_HSIZE 16 /* classid hash size */ +#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ +#define HTB_RATECM 1 /* whether to use rate computer */ +#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ +#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" #endif -/* debugging support; S is subsystem, these are defined: - 0 - netlink messages - 1 - enqueue - 2 - drop & requeue - 3 - dequeue main - 4 - dequeue one prio DRR part - 5 - dequeue class accounting - 6 - class overlimit status computation - 7 - hint tree - 8 - event queue - 10 - rate estimator - 11 - classifier - 12 - fast dequeue cache - - L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full - q->debug uint32 contains 16 2-bit fields one for subsystem starting - from LSB - */ -#ifdef HTB_DEBUG -#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L) -#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \ - printk(KERN_DEBUG FMT,##ARG) -#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC) -#define HTB_PASSQ q, -#define HTB_ARGQ struct htb_sched *q, -#define static -#undef __inline__ -#define __inline__ -#undef inline -#define inline -#define HTB_CMAGIC 0xFEFAFEF1 -#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \ - if ((N)->rb_color == -1) break; \ - rb_erase(N,R); \ - (N)->rb_color = -1; } while (0) -#else -#define HTB_DBG_COND(S,L) (0) -#define HTB_DBG(S,L,FMT,ARG...) -#define HTB_PASSQ -#define HTB_ARGQ -#define HTB_CHCL(cl) -#define htb_safe_rb_erase(N,R) rb_erase(N,R) -#endif - - /* used internaly to keep status of single class */ enum htb_cmode { - HTB_CANT_SEND, /* class can't send and can't borrow */ - HTB_MAY_BORROW, /* class can't send but may borrow */ - HTB_CAN_SEND /* class can send */ + HTB_CANT_SEND, /* class can't send and can't borrow */ + HTB_MAY_BORROW, /* class can't send but may borrow */ + HTB_CAN_SEND /* class can send */ }; /* interior & leaf nodes; props specific to leaves are marked L: */ -struct htb_class -{ -#ifdef HTB_DEBUG - unsigned magic; -#endif - /* general class parameters */ - u32 classid; - struct gnet_stats_basic bstats; - struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; - struct tc_htb_xstats xstats;/* our special stats */ - int refcnt; /* usage count of this class */ +struct htb_class { + /* general class parameters */ + u32 classid; + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct tc_htb_xstats xstats; /* our special stats */ + int refcnt; /* usage count of this class */ #ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes,sum_bytes; - unsigned long rate_packets,sum_packets; + /* rate measurement counters */ + unsigned long rate_bytes, sum_bytes; + unsigned long rate_packets, sum_packets; #endif - /* topology */ - int level; /* our level (see above) */ - struct htb_class *parent; /* parent class */ - struct list_head hlist; /* classid hash list item */ - struct list_head sibling; /* sibling list item */ - struct list_head children; /* children list */ - - union { - struct htb_class_leaf { - struct Qdisc *q; - int prio; - int aprio; - int quantum; - int deficit[TC_HTB_MAXDEPTH]; - struct list_head drop_list; - } leaf; - struct htb_class_inner { - struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ - struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ - /* When class changes from state 1->2 and disconnects from - parent's feed then we lost ptr value and start from the - first child again. Here we store classid of the - last valid ptr (used when ptr is NULL). */ - u32 last_ptr_id[TC_HTB_NUMPRIO]; - } inner; - } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - unsigned long pq_key; /* the same type as jiffies global */ - - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ - - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; - - int warned; /* only one warning about non work conserving .. */ - - /* token bucket parameters */ - struct qdisc_rate_table *rate; /* rate table of the class itself */ - struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ - long buffer,cbuffer; /* token bucket depth/rate */ - long mbuffer; /* max wait time */ - long tokens,ctokens; /* current number of tokens */ - psched_time_t t_c; /* checkpoint time */ + /* topology */ + int level; /* our level (see above) */ + struct htb_class *parent; /* parent class */ + struct hlist_node hlist; /* classid hash list item */ + struct list_head sibling; /* sibling list item */ + struct list_head children; /* children list */ + + union { + struct htb_class_leaf { + struct Qdisc *q; + int prio; + int aprio; + int quantum; + int deficit[TC_HTB_MAXDEPTH]; + struct list_head drop_list; + } leaf; + struct htb_class_inner { + struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ + struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ + /* When class changes from state 1->2 and disconnects from + parent's feed then we lost ptr value and start from the + first child again. Here we store classid of the + last valid ptr (used when ptr is NULL). */ + u32 last_ptr_id[TC_HTB_NUMPRIO]; + } inner; + } un; + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ + struct rb_node pq_node; /* node for event queue */ + unsigned long pq_key; /* the same type as jiffies global */ + + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ + + /* class attached filters */ + struct tcf_proto *filter_list; + int filter_cnt; + + int warned; /* only one warning about non work conserving .. */ + + /* token bucket parameters */ + struct qdisc_rate_table *rate; /* rate table of the class itself */ + struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ + long buffer, cbuffer; /* token bucket depth/rate */ + psched_tdiff_t mbuffer; /* max wait time */ + long tokens, ctokens; /* current number of tokens */ + psched_time_t t_c; /* checkpoint time */ }; /* TODO: maybe compute rate when size is too large .. or drop ? */ -static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate, - int size) -{ - int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } - return rate->data[slot]; +static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, + int size) +{ + int slot = size >> rate->rate.cell_log; + if (slot > 255) { + cl->xstats.giants++; + slot = 255; + } + return rate->data[slot]; } -struct htb_sched -{ - struct list_head root; /* root classes list */ - struct list_head hash[HTB_HSIZE]; /* hashed by classid */ - struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ - - /* self list - roots of self generating tree */ - struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - int row_mask[TC_HTB_MAXDEPTH]; - struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - - /* self wait list - roots of wait PQs per row */ - struct rb_root wait_pq[TC_HTB_MAXDEPTH]; - - /* time of nearest event per level (row) */ - unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; - - /* cached value of jiffies in dequeue */ - unsigned long jiffies; - - /* whether we hit non-work conserving class during this dequeue; we use */ - int nwc_hit; /* this to disable mindelay complaint in dequeue */ - - int defcls; /* class where unclassified flows go to */ - u32 debug; /* subsystem debug levels */ - - /* filters for qdisc itself */ - struct tcf_proto *filter_list; - int filter_cnt; - - int rate2quantum; /* quant = rate / rate2quantum */ - psched_time_t now; /* cached dequeue time */ - struct timer_list timer; /* send delay timer */ +struct htb_sched { + struct list_head root; /* root classes list */ + struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ + struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ + + /* self list - roots of self generating tree */ + struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + int row_mask[TC_HTB_MAXDEPTH]; + struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + + /* self wait list - roots of wait PQs per row */ + struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + + /* time of nearest event per level (row) */ + unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; + + /* cached value of jiffies in dequeue */ + unsigned long jiffies; + + /* whether we hit non-work conserving class during this dequeue; we use */ + int nwc_hit; /* this to disable mindelay complaint in dequeue */ + + int defcls; /* class where unclassified flows go to */ + + /* filters for qdisc itself */ + struct tcf_proto *filter_list; + int filter_cnt; + + int rate2quantum; /* quant = rate / rate2quantum */ + psched_time_t now; /* cached dequeue time */ + struct timer_list timer; /* send delay timer */ #ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ + struct timer_list rttim; /* rate computer timer */ + int recmp_bucket; /* which hash bucket to recompute next */ #endif - - /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - int direct_qlen; /* max qlen of above */ - long direct_pkts; + /* non shaped skbs; let them go directly thru */ + struct sk_buff_head direct_queue; + int direct_qlen; /* max qlen of above */ + + long direct_pkts; }; /* compute hash of size HTB_HSIZE for given handle */ -static __inline__ int htb_hash(u32 h) +static inline int htb_hash(u32 h) { #if HTB_HSIZE != 16 - #error "Declare new hash for your HTB_HSIZE" +#error "Declare new hash for your HTB_HSIZE" #endif - h ^= h>>8; /* stolen from cbq_hash */ - h ^= h>>4; - return h & 0xf; + h ^= h >> 8; /* stolen from cbq_hash */ + h ^= h >> 4; + return h & 0xf; } /* find class in global hash table using given handle */ -static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch) +static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct list_head *p; - if (TC_H_MAJ(handle) != sch->handle) + struct hlist_node *p; + struct htb_class *cl; + + if (TC_H_MAJ(handle) != sch->handle) return NULL; - - list_for_each (p,q->hash+htb_hash(handle)) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + + hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { if (cl->classid == handle) return cl; } @@ -305,7 +251,8 @@ static inline u32 htb_classid(struct htb_class *cl) return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; } -static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) +static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; @@ -317,8 +264,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in note that nfmark can be used too by attaching filter fw with no rules in it */ if (skb->priority == sch->handle) - return HTB_DIRECT; /* X:0 (direct flow) selected */ - if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) + return HTB_DIRECT; /* X:0 (direct flow) selected */ + if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) return cl; *qerr = NET_XMIT_BYPASS; @@ -327,7 +274,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: - case TC_ACT_STOLEN: + case TC_ACT_STOLEN: *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; @@ -336,97 +283,44 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in if (result == TC_POLICE_SHOT) return HTB_DIRECT; #endif - if ((cl = (void*)res.class) == NULL) { + if ((cl = (void *)res.class) == NULL) { if (res.classid == sch->handle) - return HTB_DIRECT; /* X:0 (direct flow) */ - if ((cl = htb_find(res.classid,sch)) == NULL) - break; /* filter selected invalid classid */ + return HTB_DIRECT; /* X:0 (direct flow) */ + if ((cl = htb_find(res.classid, sch)) == NULL) + break; /* filter selected invalid classid */ } if (!cl->level) - return cl; /* we hit leaf; return it */ + return cl; /* we hit leaf; return it */ /* we have got inner class; apply inner filter chain */ tcf = cl->filter_list; } /* classification failed; try to use default class */ - cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch); + cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); if (!cl || cl->level) - return HTB_DIRECT; /* bad default .. this is safe bet */ + return HTB_DIRECT; /* bad default .. this is safe bet */ return cl; } -#ifdef HTB_DEBUG -static void htb_next_rb_node(struct rb_node **n); -#define HTB_DUMTREE(root,memb) if(root) { \ - struct rb_node *n = (root)->rb_node; \ - while (n->rb_left) n = n->rb_left; \ - while (n) { \ - struct htb_class *cl = rb_entry(n, struct htb_class, memb); \ - printk(" %x",cl->classid); htb_next_rb_node (&n); \ - } } - -static void htb_debug_dump (struct htb_sched *q) -{ - int i,p; - printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies); - /* rows */ - for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) { - printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]); - for (p=0;p<TC_HTB_NUMPRIO;p++) { - if (!q->row[i][p].rb_node) continue; - printk(" p%d:",p); - HTB_DUMTREE(q->row[i]+p,node[p]); - } - printk("\n"); - } - /* classes */ - for (i = 0; i < HTB_HSIZE; i++) { - struct list_head *l; - list_for_each (l,q->hash+i) { - struct htb_class *cl = list_entry(l,struct htb_class,hlist); - long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); - printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d " - "pa=%x f:", - cl->classid,cl->cmode,cl->tokens,cl->ctokens, - cl->pq_node.rb_color==-1?0:cl->pq_key,diff, - cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity); - if (cl->level) - for (p=0;p<TC_HTB_NUMPRIO;p++) { - if (!cl->un.inner.feed[p].rb_node) continue; - printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0); - HTB_DUMTREE(cl->un.inner.feed+p,node[p]); - } - printk("\n"); - } - } -} -#endif /** * htb_add_to_id_tree - adds class to the round robin list * * Routine adds class to the list (actually tree) sorted by classid. * Make sure that class is not already on such list for given prio. */ -static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, - struct htb_class *cl,int prio) +static void htb_add_to_id_tree(struct rb_root *root, + struct htb_class *cl, int prio) { struct rb_node **p = &root->rb_node, *parent = NULL; - HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio); -#ifdef HTB_DEBUG - if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; } - HTB_CHCL(cl); - if (*p) { - struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]); - HTB_CHCL(x); - } -#endif + while (*p) { - struct htb_class *c; parent = *p; + struct htb_class *c; + parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); - HTB_CHCL(c); + if (cl->classid > c->classid) p = &parent->rb_right; - else + else p = &parent->rb_left; } rb_link_node(&cl->node[prio], parent, p); @@ -440,17 +334,11 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, * change its mode in cl->pq_key microseconds. Make sure that class is not * already in the queue. */ -static void htb_add_to_wait_tree (struct htb_sched *q, - struct htb_class *cl,long delay,int debug_hint) +static void htb_add_to_wait_tree(struct htb_sched *q, + struct htb_class *cl, long delay) { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; - HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key); -#ifdef HTB_DEBUG - if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; } - HTB_CHCL(cl); - if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit()) - printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint); -#endif + cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); if (cl->pq_key == q->jiffies) cl->pq_key++; @@ -458,13 +346,14 @@ static void htb_add_to_wait_tree (struct htb_sched *q, /* update the nearest event cache */ if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) q->near_ev_cache[cl->level] = cl->pq_key; - + while (*p) { - struct htb_class *c; parent = *p; + struct htb_class *c; + parent = *p; c = rb_entry(parent, struct htb_class, pq_node); if (time_after_eq(cl->pq_key, c->pq_key)) p = &parent->rb_right; - else + else p = &parent->rb_left; } rb_link_node(&cl->pq_node, parent, p); @@ -477,7 +366,7 @@ static void htb_add_to_wait_tree (struct htb_sched *q, * When we are past last key we return NULL. * Average complexity is 2 steps per call. */ -static void htb_next_rb_node(struct rb_node **n) +static inline void htb_next_rb_node(struct rb_node **n) { *n = rb_next(*n); } @@ -488,42 +377,51 @@ static void htb_next_rb_node(struct rb_node **n) * The class is added to row at priorities marked in mask. * It does nothing if mask == 0. */ -static inline void htb_add_class_to_row(struct htb_sched *q, - struct htb_class *cl,int mask) +static inline void htb_add_class_to_row(struct htb_sched *q, + struct htb_class *cl, int mask) { - HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n", - cl->classid,mask,q->row_mask[cl->level]); - HTB_CHCL(cl); q->row_mask[cl->level] |= mask; while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio); + htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); + } +} + +/* If this triggers, it is a bug in this code, but it need not be fatal */ +static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) +{ + if (!RB_EMPTY_NODE(rb)) { + WARN_ON(1); + } else { + rb_erase(rb, root); + RB_CLEAR_NODE(rb); } } + /** * htb_remove_class_from_row - removes class from its row * * The class is removed from row at priorities marked in mask. * It does nothing if mask == 0. */ -static __inline__ void htb_remove_class_from_row(struct htb_sched *q, - struct htb_class *cl,int mask) +static inline void htb_remove_class_from_row(struct htb_sched *q, + struct htb_class *cl, int mask) { int m = 0; - HTB_CHCL(cl); + while (mask) { int prio = ffz(~mask); + mask &= ~(1 << prio); - if (q->ptr[cl->level][prio] == cl->node+prio) - htb_next_rb_node(q->ptr[cl->level]+prio); - htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio); - if (!q->row[cl->level][prio].rb_node) + if (q->ptr[cl->level][prio] == cl->node + prio) + htb_next_rb_node(q->ptr[cl->level] + prio); + + htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); + if (!q->row[cl->level][prio].rb_node) m |= 1 << prio; } - HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n", - cl->classid,mask,q->row_mask[cl->level],m); q->row_mask[cl->level] &= ~m; } @@ -534,34 +432,31 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q, * for priorities it is participating on. cl->cmode must be new * (activated) mode. It does nothing if cl->prio_activity == 0. */ -static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) +static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) { struct htb_class *p = cl->parent; - long m,mask = cl->prio_activity; - HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); - HTB_CHCL(cl); + long m, mask = cl->prio_activity; while (cl->cmode == HTB_MAY_BORROW && p && mask) { - HTB_CHCL(p); - m = mask; while (m) { + m = mask; + while (m) { int prio = ffz(~m); m &= ~(1 << prio); - + if (p->un.inner.feed[prio].rb_node) /* parent already has its feed in use so that reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - - htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio); + + htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); } - HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", - p->classid,p->prio_activity,mask,p->cmode); p->prio_activity |= mask; - cl = p; p = cl->parent; - HTB_CHCL(cl); + cl = p; + p = cl->parent; + } if (cl->cmode == HTB_CAN_SEND && mask) - htb_add_class_to_row(q,cl,mask); + htb_add_class_to_row(q, cl, mask); } /** @@ -574,39 +469,52 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) { struct htb_class *p = cl->parent; - long m,mask = cl->prio_activity; - HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); - HTB_CHCL(cl); + long m, mask = cl->prio_activity; while (cl->cmode == HTB_MAY_BORROW && p && mask) { - m = mask; mask = 0; + m = mask; + mask = 0; while (m) { int prio = ffz(~m); m &= ~(1 << prio); - - if (p->un.inner.ptr[prio] == cl->node+prio) { + + if (p->un.inner.ptr[prio] == cl->node + prio) { /* we are removing child which is pointed to from parent feed - forget the pointer but remember classid */ p->un.inner.last_ptr_id[prio] = cl->classid; p->un.inner.ptr[prio] = NULL; } - - htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio); - - if (!p->un.inner.feed[prio].rb_node) + + htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); + + if (!p->un.inner.feed[prio].rb_node) mask |= 1 << prio; } - HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", - p->classid,p->prio_activity,mask,p->cmode); + p->prio_activity &= ~mask; - cl = p; p = cl->parent; - HTB_CHCL(cl); + cl = p; + p = cl->parent; + } - if (cl->cmode == HTB_CAN_SEND && mask) - htb_remove_class_from_row(q,cl,mask); + if (cl->cmode == HTB_CAN_SEND && mask) + htb_remove_class_from_row(q, cl, mask); } +#if HTB_HYSTERESIS +static inline long htb_lowater(const struct htb_class *cl) +{ + return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; +} +static inline long htb_hiwater(const struct htb_class *cl) +{ + return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; +} +#else +#define htb_lowater(cl) (0) +#define htb_hiwater(cl) (0) +#endif + /** * htb_class_mode - computes and returns current class mode * @@ -618,28 +526,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) * 0 .. -cl->{c,}buffer range. It is meant to limit number of * mode transitions per time unit. The speed gain is about 1/6. */ -static __inline__ enum htb_cmode -htb_class_mode(struct htb_class *cl,long *diff) +static inline enum htb_cmode +htb_class_mode(struct htb_class *cl, long *diff) { - long toks; + long toks; - if ((toks = (cl->ctokens + *diff)) < ( -#if HTB_HYSTERESIS - cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : -#endif - 0)) { - *diff = -toks; - return HTB_CANT_SEND; - } - if ((toks = (cl->tokens + *diff)) >= ( -#if HTB_HYSTERESIS - cl->cmode == HTB_CAN_SEND ? -cl->buffer : -#endif - 0)) - return HTB_CAN_SEND; + if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { + *diff = -toks; + return HTB_CANT_SEND; + } + + if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) + return HTB_CAN_SEND; - *diff = -toks; - return HTB_MAY_BORROW; + *diff = -toks; + return HTB_MAY_BORROW; } /** @@ -651,24 +552,21 @@ htb_class_mode(struct htb_class *cl,long *diff) * be different from old one and cl->pq_key has to be valid if changing * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). */ -static void +static void htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) -{ - enum htb_cmode new_mode = htb_class_mode(cl,diff); - - HTB_CHCL(cl); - HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid); +{ + enum htb_cmode new_mode = htb_class_mode(cl, diff); if (new_mode == cl->cmode) - return; - - if (cl->prio_activity) { /* not necessary: speed optimization */ - if (cl->cmode != HTB_CANT_SEND) - htb_deactivate_prios(q,cl); + return; + + if (cl->prio_activity) { /* not necessary: speed optimization */ + if (cl->cmode != HTB_CANT_SEND) + htb_deactivate_prios(q, cl); cl->cmode = new_mode; - if (new_mode != HTB_CANT_SEND) - htb_activate_prios(q,cl); - } else + if (new_mode != HTB_CANT_SEND) + htb_activate_prios(q, cl); + } else cl->cmode = new_mode; } @@ -679,14 +577,15 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) * for the prio. It can be called on already active leaf safely. * It also adds leaf into droplist. */ -static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) +static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); - HTB_CHCL(cl); + if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); - htb_activate_prios(q,cl); - list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio); + htb_activate_prios(q, cl); + list_add_tail(&cl->un.leaf.drop_list, + q->drops + cl->un.leaf.aprio); } } @@ -696,120 +595,120 @@ static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) * Make sure that leaf is active. In the other words it can't be called * with non-active leaf. It also removes class from the drop list. */ -static __inline__ void -htb_deactivate(struct htb_sched *q,struct htb_class *cl) +static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { BUG_TRAP(cl->prio_activity); - HTB_CHCL(cl); - htb_deactivate_prios(q,cl); + + htb_deactivate_prios(q, cl); cl->prio_activity = 0; list_del_init(&cl->un.leaf.drop_list); } static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb,sch,&ret); - - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); - q->direct_pkts++; - } else { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_DROP; - } + int ret; + struct htb_sched *q = qdisc_priv(sch); + struct htb_class *cl = htb_classify(skb, sch, &ret); + + if (cl == HTB_DIRECT) { + /* enqueue to helper queue */ + if (q->direct_queue.qlen < q->direct_qlen) { + __skb_queue_tail(&q->direct_queue, skb); + q->direct_pkts++; + } else { + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_DROP; + } #ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret == NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb (skb); - return ret; + } else if (!cl) { + if (ret == NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; #endif - } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; - } else { - cl->bstats.packets++; cl->bstats.bytes += skb->len; - htb_activate (q,cl); - } - - sch->q.qlen++; - sch->bstats.packets++; sch->bstats.bytes += skb->len; - HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); - return NET_XMIT_SUCCESS; + } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != + NET_XMIT_SUCCESS) { + sch->qstats.drops++; + cl->qstats.drops++; + return NET_XMIT_DROP; + } else { + cl->bstats.packets++; + cl->bstats.bytes += skb->len; + htb_activate(q, cl); + } + + sch->q.qlen++; + sch->bstats.packets++; + sch->bstats.bytes += skb->len; + return NET_XMIT_SUCCESS; } /* TODO: requeuing packet charges it to policers again !! */ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) { - struct htb_sched *q = qdisc_priv(sch); - int ret = NET_XMIT_SUCCESS; - struct htb_class *cl = htb_classify(skb,sch, &ret); - struct sk_buff *tskb; - - if (cl == HTB_DIRECT || !cl) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen && cl) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb (tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; - } else - htb_activate (q,cl); - - sch->q.qlen++; - sch->qstats.requeues++; - HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); - return NET_XMIT_SUCCESS; + struct htb_sched *q = qdisc_priv(sch); + int ret = NET_XMIT_SUCCESS; + struct htb_class *cl = htb_classify(skb, sch, &ret); + struct sk_buff *tskb; + + if (cl == HTB_DIRECT || !cl) { + /* enqueue to helper queue */ + if (q->direct_queue.qlen < q->direct_qlen && cl) { + __skb_queue_head(&q->direct_queue, skb); + } else { + __skb_queue_head(&q->direct_queue, skb); + tskb = __skb_dequeue_tail(&q->direct_queue); + kfree_skb(tskb); + sch->qstats.drops++; + return NET_XMIT_CN; + } + } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != + NET_XMIT_SUCCESS) { + sch->qstats.drops++; + cl->qstats.drops++; + return NET_XMIT_DROP; + } else + htb_activate(q, cl); + + sch->q.qlen++; + sch->qstats.requeues++; + return NET_XMIT_SUCCESS; } static void htb_timer(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc*)arg; - sch->flags &= ~TCQ_F_THROTTLED; - wmb(); - netif_schedule(sch->dev); + struct Qdisc *sch = (struct Qdisc *)arg; + sch->flags &= ~TCQ_F_THROTTLED; + wmb(); + netif_schedule(sch->dev); } #ifdef HTB_RATECM #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 static void htb_rate_timer(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc*)arg; + struct Qdisc *sch = (struct Qdisc *)arg; struct htb_sched *q = qdisc_priv(sch); - struct list_head *p; + struct hlist_node *p; + struct htb_class *cl; + /* lock queue so that we can muck with it */ - HTB_QLOCK(sch); - HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies); + spin_lock_bh(&sch->dev->queue_lock); q->rttim.expires = jiffies + HZ; add_timer(&q->rttim); /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) + if (++q->recmp_bucket >= HTB_HSIZE) q->recmp_bucket = 0; - list_for_each (p,q->hash+q->recmp_bucket) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); - HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n", - cl->classid,cl->sum_bytes,cl->sum_packets); - RT_GEN (cl->sum_bytes,cl->rate_bytes); - RT_GEN (cl->sum_packets,cl->rate_packets); + + hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { + RT_GEN(cl->sum_bytes, cl->rate_bytes); + RT_GEN(cl->sum_packets, cl->rate_packets); } - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); } #endif @@ -824,12 +723,11 @@ static void htb_rate_timer(unsigned long arg) * CAN_SEND) because we can use more precise clock that event queue here. * In such case we remove class from event queue first. */ -static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, - int level,int bytes) -{ - long toks,diff; +static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, + int level, int bytes) +{ + long toks, diff; enum htb_cmode old_mode; - HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes); #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ if (toks > cl->B) toks = cl->B; \ @@ -838,47 +736,31 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, cl->T = toks while (cl) { - HTB_CHCL(cl); - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); -#ifdef HTB_DEBUG - if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { - if (net_ratelimit()) - printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", - cl->classid, diff, -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY - q->now.tv_sec * 1000000ULL + q->now.tv_usec, - cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, -#else - (unsigned long long) q->now, - (unsigned long long) cl->t_c, -#endif - q->jiffies); - diff = 1000; - } -#endif + diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); if (cl->level >= level) { - if (cl->level == level) cl->xstats.lends++; - HTB_ACCNT (tokens,buffer,rate); + if (cl->level == level) + cl->xstats.lends++; + HTB_ACCNT(tokens, buffer, rate); } else { cl->xstats.borrows++; - cl->tokens += diff; /* we moved t_c; update tokens */ + cl->tokens += diff; /* we moved t_c; update tokens */ } - HTB_ACCNT (ctokens,cbuffer,ceil); + HTB_ACCNT(ctokens, cbuffer, ceil); cl->t_c = q->now; - HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens); - old_mode = cl->cmode; diff = 0; - htb_change_class_mode(q,cl,&diff); + old_mode = cl->cmode; + diff = 0; + htb_change_class_mode(q, cl, &diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree (q,cl,diff,1); + htb_add_to_wait_tree(q, cl, diff); } - #ifdef HTB_RATECM /* update rate counters */ - cl->sum_bytes += bytes; cl->sum_packets++; + cl->sum_bytes += bytes; + cl->sum_packets++; #endif /* update byte stats except for leaves which are already updated */ @@ -897,60 +779,46 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Aplied are events whose have cl->pq_key <= jiffies. */ -static long htb_do_events(struct htb_sched *q,int level) +static long htb_do_events(struct htb_sched *q, int level) { int i; - HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n", - level,q->wait_pq[level].rb_node,q->row_mask[level]); + for (i = 0; i < 500; i++) { struct htb_class *cl; long diff; struct rb_node *p = q->wait_pq[level].rb_node; - if (!p) return 0; - while (p->rb_left) p = p->rb_left; + if (!p) + return 0; + while (p->rb_left) + p = p->rb_left; cl = rb_entry(p, struct htb_class, pq_node); if (time_after(cl->pq_key, q->jiffies)) { - HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies); return cl->pq_key - q->jiffies; } - htb_safe_rb_erase(p,q->wait_pq+level); - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); -#ifdef HTB_DEBUG - if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { - if (net_ratelimit()) - printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", - cl->classid, diff, -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY - q->now.tv_sec * 1000000ULL + q->now.tv_usec, - cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, -#else - (unsigned long long) q->now, - (unsigned long long) cl->t_c, -#endif - q->jiffies); - diff = 1000; - } -#endif - htb_change_class_mode(q,cl,&diff); + htb_safe_rb_erase(p, q->wait_pq + level); + diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); + htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree (q,cl,diff,2); + htb_add_to_wait_tree(q, cl, diff); } if (net_ratelimit()) printk(KERN_WARNING "htb: too many events !\n"); - return HZ/10; + return HZ / 10; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL is no such one exists. */ -static struct rb_node * -htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) +static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, + u32 id) { struct rb_node *r = NULL; while (n) { - struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]); - if (id == cl->classid) return n; - + struct htb_class *cl = + rb_entry(n, struct htb_class, node[prio]); + if (id == cl->classid) + return n; + if (id > cl->classid) { n = n->rb_right; } else { @@ -966,49 +834,49 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) * * Find leaf where current feed pointers points to. */ -static struct htb_class * -htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) +static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, + struct rb_node **pptr, u32 * pid) { int i; struct { struct rb_node *root; struct rb_node **pptr; u32 *pid; - } stk[TC_HTB_MAXDEPTH],*sp = stk; - + } stk[TC_HTB_MAXDEPTH], *sp = stk; + BUG_TRAP(tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; for (i = 0; i < 65535; i++) { - HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid); - - if (!*sp->pptr && *sp->pid) { + if (!*sp->pptr && *sp->pid) { /* ptr was invalidated but id is valid - try to recover the original or next ptr */ - *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid); + *sp->pptr = + htb_id_find_next_upper(prio, sp->root, *sp->pid); } - *sp->pid = 0; /* ptr is valid now so that remove this hint as it - can become out of date quickly */ - if (!*sp->pptr) { /* we are at right end; rewind & go up */ + *sp->pid = 0; /* ptr is valid now so that remove this hint as it + can become out of date quickly */ + if (!*sp->pptr) { /* we are at right end; rewind & go up */ *sp->pptr = sp->root; - while ((*sp->pptr)->rb_left) + while ((*sp->pptr)->rb_left) *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL; - htb_next_rb_node (sp->pptr); + BUG_TRAP(*sp->pptr); + if (!*sp->pptr) + return NULL; + htb_next_rb_node(sp->pptr); } } else { struct htb_class *cl; - cl = rb_entry(*sp->pptr,struct htb_class,node[prio]); - HTB_CHCL(cl); - if (!cl->level) + cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); + if (!cl->level) return cl; (++sp)->root = cl->un.inner.feed[prio].rb_node; - sp->pptr = cl->un.inner.ptr+prio; - sp->pid = cl->un.inner.last_ptr_id+prio; + sp->pptr = cl->un.inner.ptr + prio; + sp->pid = cl->un.inner.last_ptr_id + prio; } } BUG_TRAP(0); @@ -1017,21 +885,21 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 /* dequeues packet at given priority and level; call only if you are sure that there is active class at prio/level */ -static struct sk_buff * -htb_dequeue_tree(struct htb_sched *q,int prio,int level) +static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, + int level) { struct sk_buff *skb = NULL; - struct htb_class *cl,*start; + struct htb_class *cl, *start; /* look initial class up in the row */ - start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio, - q->ptr[level]+prio,q->last_ptr_id[level]+prio); - + start = cl = htb_lookup_leaf(q->row[level] + prio, prio, + q->ptr[level] + prio, + q->last_ptr_id[level] + prio); + do { next: - BUG_TRAP(cl); - if (!cl) return NULL; - HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n", - prio,level,cl->classid,cl->un.leaf.deficit[level]); + BUG_TRAP(cl); + if (!cl) + return NULL; /* class can be empty - it is unlikely but can be true if leaf qdisc drops packets in enqueue routine or if someone used @@ -1039,64 +907,69 @@ next: simply deactivate and skip such class */ if (unlikely(cl->un.leaf.q->q.qlen == 0)) { struct htb_class *next; - htb_deactivate(q,cl); + htb_deactivate(q, cl); /* row/level might become empty */ if ((q->row_mask[level] & (1 << prio)) == 0) - return NULL; - - next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio, - prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio); + return NULL; + + next = htb_lookup_leaf(q->row[level] + prio, + prio, q->ptr[level] + prio, + q->last_ptr_id[level] + prio); - if (cl == start) /* fix start if we just deleted it */ + if (cl == start) /* fix start if we just deleted it */ start = next; cl = next; goto next; } - - if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) + + skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); + if (likely(skb != NULL)) break; if (!cl->warned) { - printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid); + printk(KERN_WARNING + "htb: class %X isn't work conserving ?!\n", + cl->classid); cl->warned = 1; } q->nwc_hit++; - htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); - cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio, - q->last_ptr_id[level]+prio); + htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> + ptr[0]) + prio); + cl = htb_lookup_leaf(q->row[level] + prio, prio, + q->ptr[level] + prio, + q->last_ptr_id[level] + prio); } while (cl != start); if (likely(skb != NULL)) { if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { - HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n", - level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum); cl->un.leaf.deficit[level] += cl->un.leaf.quantum; - htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); + htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> + ptr[0]) + prio); } /* this used to be after charge_class but this constelation gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) - htb_deactivate (q,cl); - htb_charge_class (q,cl,level,skb->len); + htb_deactivate(q, cl); + htb_charge_class(q, cl, level, skb->len); } return skb; } -static void htb_delay_by(struct Qdisc *sch,long delay) +static void htb_delay_by(struct Qdisc *sch, long delay) { struct htb_sched *q = qdisc_priv(sch); - if (delay <= 0) delay = 1; - if (unlikely(delay > 5*HZ)) { + if (delay <= 0) + delay = 1; + if (unlikely(delay > 5 * HZ)) { if (net_ratelimit()) printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); - delay = 5*HZ; + delay = 5 * HZ; } /* why don't use jiffies here ? because expires can be in past */ mod_timer(&q->timer, q->jiffies + delay); sch->flags |= TCQ_F_THROTTLED; sch->qstats.overlimits++; - HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay); } static struct sk_buff *htb_dequeue(struct Qdisc *sch) @@ -1105,22 +978,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; long min_delay; -#ifdef HTB_DEBUG - int evs_used = 0; -#endif q->jiffies = jiffies; - HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue), - sch->q.qlen); /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) { + skb = __skb_dequeue(&q->direct_queue); + if (skb != NULL) { sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } - if (!sch->q.qlen) goto fin; + if (!sch->q.qlen) + goto fin; PSCHED_GET_TIME(q->now); min_delay = LONG_MAX; @@ -1130,21 +1000,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) int m; long delay; if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { - delay = htb_do_events(q,level); - q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); -#ifdef HTB_DEBUG - evs_used++; -#endif + delay = htb_do_events(q, level); + q->near_ev_cache[level] = + q->jiffies + (delay ? delay : HZ); } else - delay = q->near_ev_cache[level] - q->jiffies; - - if (delay && min_delay > delay) + delay = q->near_ev_cache[level] - q->jiffies; + + if (delay && min_delay > delay) min_delay = delay; m = ~q->row_mask[level]; while (m != (int)(-1)) { - int prio = ffz (m); + int prio = ffz(m); m |= 1 << prio; - skb = htb_dequeue_tree(q,prio,level); + skb = htb_dequeue_tree(q, prio, level); if (likely(skb != NULL)) { sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; @@ -1152,40 +1020,28 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } } } -#ifdef HTB_DEBUG - if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) { - if (min_delay == LONG_MAX) { - printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n", - evs_used,q->jiffies,jiffies); - htb_debug_dump(q); - } else - printk(KERN_WARNING "HTB: mindelay=%ld, some class has " - "too small rate\n",min_delay); - } -#endif - htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay); + htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay); fin: - HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb); return skb; } /* try to drop from each class (by prio) until one succeed */ -static unsigned int htb_drop(struct Qdisc* sch) +static unsigned int htb_drop(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); int prio; for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { struct list_head *p; - list_for_each (p,q->drops+prio) { + list_for_each(p, q->drops + prio) { struct htb_class *cl = list_entry(p, struct htb_class, un.leaf.drop_list); unsigned int len; - if (cl->un.leaf.q->ops->drop && - (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { + if (cl->un.leaf.q->ops->drop && + (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { sch->q.qlen--; if (!cl->un.leaf.q->q.qlen) - htb_deactivate (q,cl); + htb_deactivate(q, cl); return len; } } @@ -1195,29 +1051,25 @@ static unsigned int htb_drop(struct Qdisc* sch) /* reset all classes */ /* always caled under BH & queue lock */ -static void htb_reset(struct Qdisc* sch) +static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); int i; - HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle); for (i = 0; i < HTB_HSIZE; i++) { - struct list_head *p; - list_for_each (p,q->hash+i) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + struct hlist_node *p; + struct htb_class *cl; + + hlist_for_each_entry(cl, p, q->hash + i, hlist) { if (cl->level) - memset(&cl->un.inner,0,sizeof(cl->un.inner)); + memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { - if (cl->un.leaf.q) + if (cl->un.leaf.q) qdisc_reset(cl->un.leaf.q); INIT_LIST_HEAD(&cl->un.leaf.drop_list); } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; -#ifdef HTB_DEBUG - cl->pq_node.rb_color = -1; - memset(cl->node,255,sizeof(cl->node)); -#endif } } @@ -1225,12 +1077,12 @@ static void htb_reset(struct Qdisc* sch) del_timer(&q->timer); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; - memset(q->row,0,sizeof(q->row)); - memset(q->row_mask,0,sizeof(q->row_mask)); - memset(q->wait_pq,0,sizeof(q->wait_pq)); - memset(q->ptr,0,sizeof(q->ptr)); + memset(q->row, 0, sizeof(q->row)); + memset(q->row_mask, 0, sizeof(q->row_mask)); + memset(q->wait_pq, 0, sizeof(q->wait_pq)); + memset(q->ptr, 0, sizeof(q->ptr)); for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops+i); + INIT_LIST_HEAD(q->drops + i); } static int htb_init(struct Qdisc *sch, struct rtattr *opt) @@ -1239,36 +1091,31 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) struct rtattr *tb[TCA_HTB_INIT]; struct tc_htb_glob *gopt; int i; -#ifdef HTB_DEBUG - printk(KERN_INFO "HTB init, kernel part version %d.%d\n", - HTB_VER >> 16,HTB_VER & 0xffff); -#endif if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || - tb[TCA_HTB_INIT-1] == NULL || - RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) { + tb[TCA_HTB_INIT - 1] == NULL || + RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) { printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); return -EINVAL; } - gopt = RTA_DATA(tb[TCA_HTB_INIT-1]); + gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]); if (gopt->version != HTB_VER >> 16) { - printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n", - HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); + printk(KERN_ERR + "HTB: need tc/htb version %d (minor is %d), you have %d\n", + HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); return -EINVAL; } - q->debug = gopt->debug; - HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum); INIT_LIST_HEAD(&q->root); for (i = 0; i < HTB_HSIZE; i++) - INIT_LIST_HEAD(q->hash+i); + INIT_HLIST_HEAD(q->hash + i); for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops+i); + INIT_LIST_HEAD(q->drops + i); init_timer(&q->timer); skb_queue_head_init(&q->direct_queue); q->direct_qlen = sch->dev->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ + if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; q->timer.function = htb_timer; q->timer.data = (unsigned long)sch; @@ -1290,80 +1137,72 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { struct htb_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_glob gopt; - HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle); - HTB_QLOCK(sch); + spin_lock_bh(&sch->dev->queue_lock); gopt.direct_pkts = q->direct_pkts; -#ifdef HTB_DEBUG - if (HTB_DBG_COND(0,2)) - htb_debug_dump(q); -#endif gopt.version = HTB_VER; gopt.rate2quantum = q->rate2quantum; gopt.defcls = q->defcls; - gopt.debug = q->debug; - rta = (struct rtattr*)b; + gopt.debug = 0; + rta = (struct rtattr *)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); rta->rta_len = skb->tail - b; - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); skb_trim(skb, skb->tail - skb->data); return -1; } static int htb_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { -#ifdef HTB_DEBUG - struct htb_sched *q = qdisc_priv(sch); -#endif - struct htb_class *cl = (struct htb_class*)arg; - unsigned char *b = skb->tail; + struct htb_class *cl = (struct htb_class *)arg; + unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_opt opt; - HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid); - - HTB_QLOCK(sch); + spin_lock_bh(&sch->dev->queue_lock); tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; tcm->tcm_handle = cl->classid; if (!cl->level && cl->un.leaf.q) tcm->tcm_info = cl->un.leaf.q->handle; - rta = (struct rtattr*)b; + rta = (struct rtattr *)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); - memset (&opt,0,sizeof(opt)); + memset(&opt, 0, sizeof(opt)); - opt.rate = cl->rate->rate; opt.buffer = cl->buffer; - opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; - opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio; - opt.level = cl->level; + opt.rate = cl->rate->rate; + opt.buffer = cl->buffer; + opt.ceil = cl->ceil->rate; + opt.cbuffer = cl->cbuffer; + opt.quantum = cl->un.leaf.quantum; + opt.prio = cl->un.leaf.prio; + opt.level = cl->level; RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); rta->rta_len = skb->tail - b; - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); skb_trim(skb, b - skb->data); return -1; } static int -htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) +htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; #ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE); + cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); + cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); #endif if (!cl->level && cl->un.leaf.q) @@ -1380,21 +1219,22 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, } static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; if (cl && !cl->level) { - if (new == NULL && (new = qdisc_create_dflt(sch->dev, - &pfifo_qdisc_ops)) == NULL) - return -ENOBUFS; + if (new == NULL && (new = qdisc_create_dflt(sch->dev, + &pfifo_qdisc_ops)) + == NULL) + return -ENOBUFS; sch_tree_lock(sch); if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { if (cl->prio_activity) - htb_deactivate (qdisc_priv(sch),cl); + htb_deactivate(qdisc_priv(sch), cl); /* TODO: is it correct ? Why CBQ doesn't do it ? */ - sch->q.qlen -= (*old)->q.qlen; + sch->q.qlen -= (*old)->q.qlen; qdisc_reset(*old); } sch_tree_unlock(sch); @@ -1403,20 +1243,16 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return -ENOENT; } -static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) +static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; return (cl && !cl->level) ? cl->un.leaf.q : NULL; } static unsigned long htb_get(struct Qdisc *sch, u32 classid) { -#ifdef HTB_DEBUG - struct htb_sched *q = qdisc_priv(sch); -#endif - struct htb_class *cl = htb_find(classid,sch); - HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0); - if (cl) + struct htb_class *cl = htb_find(classid, sch); + if (cl) cl->refcnt++; return (unsigned long)cl; } @@ -1431,10 +1267,9 @@ static void htb_destroy_filters(struct tcf_proto **fl) } } -static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) +static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); - HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0); if (!cl->level) { BUG_TRAP(cl->un.leaf.q); sch->q.qlen -= cl->un.leaf.q->q.qlen; @@ -1442,45 +1277,45 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) } qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - - htb_destroy_filters (&cl->filter_list); - - while (!list_empty(&cl->children)) - htb_destroy_class (sch,list_entry(cl->children.next, - struct htb_class,sibling)); + + htb_destroy_filters(&cl->filter_list); + + while (!list_empty(&cl->children)) + htb_destroy_class(sch, list_entry(cl->children.next, + struct htb_class, sibling)); /* note: this delete may happen twice (see htb_delete) */ - list_del(&cl->hlist); + if (!hlist_unhashed(&cl->hlist)) + hlist_del(&cl->hlist); list_del(&cl->sibling); - + if (cl->prio_activity) - htb_deactivate (q,cl); - + htb_deactivate(q, cl); + if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); - + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + kfree(cl); } /* always caled under BH & queue lock */ -static void htb_destroy(struct Qdisc* sch) +static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - HTB_DBG(0,1,"htb_destroy q=%p\n",q); - del_timer_sync (&q->timer); + del_timer_sync(&q->timer); #ifdef HTB_RATECM - del_timer_sync (&q->rttim); + del_timer_sync(&q->rttim); #endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ htb_destroy_filters(&q->filter_list); - - while (!list_empty(&q->root)) - htb_destroy_class (sch,list_entry(q->root.next, - struct htb_class,sibling)); + + while (!list_empty(&q->root)) + htb_destroy_class(sch, list_entry(q->root.next, + struct htb_class, sibling)); __skb_queue_purge(&q->direct_queue); } @@ -1488,24 +1323,25 @@ static void htb_destroy(struct Qdisc* sch) static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = (struct htb_class*)arg; - HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); + struct htb_class *cl = (struct htb_class *)arg; // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class // refs so that we can remove children safely there ? if (!list_empty(&cl->children) || cl->filter_cnt) return -EBUSY; - + sch_tree_lock(sch); - + /* delete from hash and active; remainder in destroy_class */ - list_del_init(&cl->hlist); + if (!hlist_unhashed(&cl->hlist)) + hlist_del(&cl->hlist); + if (cl->prio_activity) - htb_deactivate (q,cl); + htb_deactivate(q, cl); if (--cl->refcnt == 0) - htb_destroy_class(sch,cl); + htb_destroy_class(sch, cl); sch_tree_unlock(sch); return 0; @@ -1513,45 +1349,46 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) static void htb_put(struct Qdisc *sch, unsigned long arg) { -#ifdef HTB_DEBUG - struct htb_sched *q = qdisc_priv(sch); -#endif - struct htb_class *cl = (struct htb_class*)arg; - HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); + struct htb_class *cl = (struct htb_class *)arg; if (--cl->refcnt == 0) - htb_destroy_class(sch,cl); + htb_destroy_class(sch, cl); } -static int htb_change_class(struct Qdisc *sch, u32 classid, - u32 parentid, struct rtattr **tca, unsigned long *arg) +static int htb_change_class(struct Qdisc *sch, u32 classid, + u32 parentid, struct rtattr **tca, + unsigned long *arg) { int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = (struct htb_class*)*arg,*parent; - struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct rtattr *opt = tca[TCA_OPTIONS - 1]; struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct rtattr *tb[TCA_HTB_RTAB]; struct tc_htb_opt *hopt; /* extract all subattrs from opt attr */ if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || - tb[TCA_HTB_PARMS-1] == NULL || - RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt)) + tb[TCA_HTB_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt)) goto failure; - - parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch); - hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); - HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); - if (!rtab || !ctab) goto failure; + parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); + + hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]); + + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); + if (!rtab || !ctab) + goto failure; - if (!cl) { /* new class */ + if (!cl) { /* new class */ struct Qdisc *new_q; + int prio; + /* check for valid classid */ - if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch)) + if (!classid || TC_H_MAJ(classid ^ sch->handle) + || htb_find(classid, sch)) goto failure; /* check maximal depth */ @@ -1560,18 +1397,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; } err = -ENOBUFS; - if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL) + if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - - memset(cl, 0, sizeof(*cl)); + cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); - INIT_LIST_HEAD(&cl->hlist); + INIT_HLIST_NODE(&cl->hlist); INIT_LIST_HEAD(&cl->children); INIT_LIST_HEAD(&cl->un.leaf.drop_list); -#ifdef HTB_DEBUG - cl->magic = HTB_CMAGIC; -#endif + RB_CLEAR_NODE(&cl->pq_node); + + for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) + RB_CLEAR_NODE(&cl->node[prio]); /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock @@ -1581,53 +1418,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent && !parent->level) { /* turn parent into inner node */ sch->q.qlen -= parent->un.leaf.q->q.qlen; - qdisc_destroy (parent->un.leaf.q); - if (parent->prio_activity) - htb_deactivate (q,parent); + qdisc_destroy(parent->un.leaf.q); + if (parent->prio_activity) + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/); + htb_safe_rb_erase(&parent->pq_node, q->wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level - : TC_HTB_MAXDEPTH) - 1; - memset (&parent->un.inner,0,sizeof(parent->un.inner)); + : TC_HTB_MAXDEPTH) - 1; + memset(&parent->un.inner, 0, sizeof(parent->un.inner)); } /* leaf (we) needs elementary qdisc */ cl->un.leaf.q = new_q ? new_q : &noop_qdisc; - cl->classid = classid; cl->parent = parent; + cl->classid = classid; + cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ cl->tokens = hopt->buffer; cl->ctokens = hopt->cbuffer; - cl->mbuffer = 60000000; /* 1min */ + cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */ PSCHED_GET_TIME(cl->t_c); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - list_add_tail(&cl->hlist, q->hash+htb_hash(classid)); - list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); -#ifdef HTB_DEBUG - { - int i; - for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1; - cl->pq_node.rb_color = -1; - } -#endif - } else sch_tree_lock(sch); + hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); + list_add_tail(&cl->sibling, + parent ? &parent->children : &q->root); + } else + sch_tree_lock(sch); /* it used to be a nasty bug here, we have to check that node - is really leaf before changing cl->un.leaf ! */ + is really leaf before changing cl->un.leaf ! */ if (!cl->level) { cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->un.leaf.quantum < 1000) { - printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid); + printk(KERN_WARNING + "HTB: quantum of class %X is small. Consider r2q change.\n", + cl->classid); cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { - printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid); + printk(KERN_WARNING + "HTB: quantum of class %X is big. Consider r2q change.\n", + cl->classid); cl->un.leaf.quantum = 200000; } if (hopt->quantum) @@ -1638,16 +1475,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->buffer = hopt->buffer; cl->cbuffer = hopt->cbuffer; - if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab; - if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab; + if (cl->rate) + qdisc_put_rtab(cl->rate); + cl->rate = rtab; + if (cl->ceil) + qdisc_put_rtab(cl->ceil); + cl->ceil = ctab; sch_tree_unlock(sch); *arg = (unsigned long)cl; return 0; failure: - if (rtab) qdisc_put_rtab(rtab); - if (ctab) qdisc_put_rtab(ctab); + if (rtab) + qdisc_put_rtab(rtab); + if (ctab) + qdisc_put_rtab(ctab); return err; } @@ -1656,28 +1499,28 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; - HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl); + return fl; } static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) + u32 classid) { struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_find (classid,sch); - HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt); + struct htb_class *cl = htb_find(classid, sch); + /*if (cl && !cl->level) return 0; - The line above used to be there to prevent attaching filters to - leaves. But at least tc_index filter uses this just to get class - for other reasons so that we have to allow for it. - ---- - 19.6.2002 As Werner explained it is ok - bind filter is just - another way to "lock" the class - unlike "get" this lock can - be broken by class during destroy IIUC. + The line above used to be there to prevent attaching filters to + leaves. But at least tc_index filter uses this just to get class + for other reasons so that we have to allow for it. + ---- + 19.6.2002 As Werner explained it is ok - bind filter is just + another way to "lock" the class - unlike "get" this lock can + be broken by class during destroy IIUC. */ - if (cl) - cl->filter_cnt++; - else + if (cl) + cl->filter_cnt++; + else q->filter_cnt++; return (unsigned long)cl; } @@ -1686,10 +1529,10 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt); - if (cl) - cl->filter_cnt--; - else + + if (cl) + cl->filter_cnt--; + else q->filter_cnt--; } @@ -1702,9 +1545,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < HTB_HSIZE; i++) { - struct list_head *p; - list_for_each (p,q->hash+i) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + struct hlist_node *p; + struct htb_class *cl; + + hlist_for_each_entry(cl, p, q->hash + i, hlist) { if (arg->count < arg->skip) { arg->count++; continue; @@ -1752,12 +1596,13 @@ static struct Qdisc_ops htb_qdisc_ops = { static int __init htb_module_init(void) { - return register_qdisc(&htb_qdisc_ops); + return register_qdisc(&htb_qdisc_ops); } -static void __exit htb_module_exit(void) +static void __exit htb_module_exit(void) { - unregister_qdisc(&htb_qdisc_ops); + unregister_qdisc(&htb_qdisc_ops); } + module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 8edc32a6ad2f..c3242f727d41 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -7,7 +7,6 @@ * Authors: Jamal Hadi Salim 1999 */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/skbuff.h> diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5a4a4d0ae502..45939bafbdf8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -13,7 +13,6 @@ * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/types.h> @@ -149,7 +148,8 @@ static long tabledist(unsigned long mu, long sigma, static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; + /* We don't fill cb now as skb_unshare() may invalidate it */ + struct netem_skb_cb *cb; struct sk_buff *skb2; int ret; int count = 1; @@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) - || (skb->ip_summed == CHECKSUM_HW - && skb_checksum_help(skb, 0))) { + || (skb->ip_summed == CHECKSUM_PARTIAL + && skb_checksum_help(skb))) { sch->qstats.drops++; return NET_XMIT_DROP; } @@ -201,6 +201,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } + cb = (struct netem_skb_cb *)skb->cb; if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3395ca7bcadf..a5fa03c0c19b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -11,7 +11,6 @@ * Init -- EINVAL when opt undefined */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 2be563cba72b..d65cadddea69 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -14,7 +14,6 @@ * J Hadi Salim 980816: ECN support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index e057768f68b4..d0d6e595a78c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -9,7 +9,6 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index d8e03c74ca76..d9a5d298d755 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -12,7 +12,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9d05e13e92f6..27329ce9c311 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -441,7 +441,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, /* If the primary path is changing, assume that the * user wants to use this new path. */ - if (transport->state != SCTP_INACTIVE) + if ((transport->state == SCTP_ACTIVE) || + (transport->state == SCTP_UNKNOWN)) asoc->peer.active_path = transport; /* @@ -532,11 +533,11 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, port = addr->v4.sin_port; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", - " port: %d state:%s\n", + " port: %d state:%d\n", asoc, addr, addr->v4.sin_port, - peer_state == SCTP_UNKNOWN?"UNKNOWN":"ACTIVE"); + peer_state); /* Set the port if it has not been set yet. */ if (0 == asoc->peer.port) @@ -545,9 +546,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); if (peer) { - if (peer_state == SCTP_ACTIVE && - peer->state == SCTP_UNKNOWN) - peer->state = SCTP_ACTIVE; + if (peer->state == SCTP_UNKNOWN) { + if (peer_state == SCTP_ACTIVE) + peer->state = SCTP_ACTIVE; + if (peer_state == SCTP_UNCONFIRMED) + peer->state = SCTP_UNCONFIRMED; + } return peer; } @@ -739,7 +743,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); - if (t->state == SCTP_INACTIVE) + if ((t->state == SCTP_INACTIVE) || + (t->state == SCTP_UNCONFIRMED)) continue; if (!first || t->last_time_heard > first->last_time_heard) { second = first; @@ -759,7 +764,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, * [If the primary is active but not most recent, bump the most * recently used transport.] */ - if (asoc->peer.primary_path->state != SCTP_INACTIVE && + if (((asoc->peer.primary_path->state == SCTP_ACTIVE) || + (asoc->peer.primary_path->state == SCTP_UNKNOWN)) && first != asoc->peer.primary_path) { second = first; first = asoc->peer.primary_path; @@ -1054,7 +1060,7 @@ void sctp_assoc_update(struct sctp_association *asoc, transports); if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr)) sctp_assoc_add_peer(asoc, &trans->ipaddr, - GFP_ATOMIC, SCTP_ACTIVE); + GFP_ATOMIC, trans->state); } asoc->ctsn_ack_point = asoc->next_tsn - 1; @@ -1094,7 +1100,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) /* Try to find an active transport. */ - if (t->state != SCTP_INACTIVE) { + if ((t->state == SCTP_ACTIVE) || + (t->state == SCTP_UNKNOWN)) { break; } else { /* Keep track of the next transport in case diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 2b962627f631..2b9c12a170e5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -146,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - gfp_t gfp) + __u8 use_as_src, gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -163,6 +163,8 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, if (!addr->a.v4.sin_port) addr->a.v4.sin_port = bp->port; + addr->use_as_src = use_as_src; + INIT_LIST_HEAD(&addr->list); list_add_tail(&addr->list, &bp->address_list); SCTP_DBG_OBJCNT_INC(addr); @@ -274,7 +276,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, } af->from_addr_param(&addr, rawaddr, port, 0); - retval = sctp_add_bind_addr(bp, &addr, gfp); + retval = sctp_add_bind_addr(bp, &addr, 1, gfp); if (retval) { /* Can't finish building the list, clean up. */ sctp_bind_addr_clean(bp); @@ -367,7 +369,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && (flags & SCTP_ADDR6_PEERSUPP)))) - error = sctp_add_bind_addr(dest, addr, gfp); + error = sctp_add_bind_addr(dest, addr, 1, gfp); } return error; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 67bd53070ee0..35c49ff2d062 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -158,6 +158,12 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep, void sctp_endpoint_free(struct sctp_endpoint *ep) { ep->base.dead = 1; + + ep->base.sk->sk_state = SCTP_SS_CLOSED; + + /* Unlink this endpoint, so we can't find it again! */ + sctp_unhash_endpoint(ep); + sctp_endpoint_put(ep); } @@ -166,13 +172,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); - ep->base.sk->sk_state = SCTP_SS_CLOSED; - - /* Unlink this endpoint, so we can't find it again! */ - sctp_unhash_endpoint(ep); - /* Free up the HMAC transform. */ - sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); + crypto_free_hash(sctp_sk(ep->base.sk)->hmac); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/input.c b/net/sctp/input.c index 42b66e74bbb5..64f630102532 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -218,17 +218,11 @@ int sctp_rcv(struct sk_buff *skb) } } - /* SCTP seems to always need a timestamp right now (FIXME) */ - if (skb->tstamp.off_sec == 0) { - __net_timestamp(skb); - sock_enable_timestamp(sk); - } - if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; nf_reset(skb); - if (sk_filter(sk, skb, 1)) + if (sk_filter(sk, skb)) goto discard_release; /* Create an SCTP packet structure. */ @@ -255,10 +249,13 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk)) { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); sctp_add_backlog(sk, skb); - else + } else { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); + } sctp_bh_unlock_sock(sk); @@ -271,6 +268,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -384,7 +382,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, * pmtu discovery on this transport. */ t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - t->param_flags = (t->param_flags & ~SPP_HB) | + t->param_flags = (t->param_flags & ~SPP_PMTUD) | SPP_PMTUD_DISABLE; } else { t->pathmtu = pmtu; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index cf0c767d43ae..cf6deed7e849 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue) /* Put a new packet in an SCTP inqueue. * We assume that packet->sctp_hdr is set and in host byte order. */ -void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) +void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) { /* Directly call the packet handling routine. */ @@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ - list_add_tail(&packet->list, &q->in_chunk_list); + list_add_tail(&chunk->list, &q->in_chunk_list); q->immediate.func(q->immediate.data); } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8ef08070c8b6..249e5033c1a8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -78,7 +78,6 @@ #include <asm/uaccess.h> -extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *); static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inetaddr_event, }; @@ -290,7 +289,8 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, sctp_read_lock(addr_lock); list_for_each(pos, &bp->address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if ((laddr->a.sa.sa_family == AF_INET6) && + if ((laddr->use_as_src) && + (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); if (!baddr || (matchlen < bmatchlen)) { @@ -321,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, struct inet6_ifaddr *ifp; struct sctp_sockaddr_entry *addr; - read_lock(&addrconf_lock); + rcu_read_lock(); if ((in6_dev = __in6_dev_get(dev)) == NULL) { - read_unlock(&addrconf_lock); + rcu_read_unlock(); return; } @@ -342,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } read_unlock(&in6_dev->lock); - read_unlock(&addrconf_lock); + rcu_read_unlock(); } /* Initialize a sockaddr_storage from in incoming skb. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index cdc5a3936766..3ef4351dd956 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -633,7 +633,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * data will fit or delay in hopes of bundling a full * sized packet. */ - if (len < asoc->pathmtu - packet->overhead) { + if (len < asoc->frag_point) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } @@ -645,7 +645,13 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; - /* Update our view of the receiver's rwnd. */ + /* Update our view of the receiver's rwnd. Include sk_buff overhead + * while updating peer.rwnd so that it reduces the chances of a + * receiver running out of receive buffer space even when receive + * window is still open. This can happen when a sender is sending + * sending small messages. + */ + datasize += sizeof(struct sk_buff); if (datasize < rwnd) rwnd -= datasize; else diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e5faa351aaad..739582415bf6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -416,7 +416,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ - q->asoc->peer.rwnd += sctp_data_size(chunk); + q->asoc->peer.rwnd += (sctp_data_size(chunk) + + sizeof(struct sk_buff)); q->outstanding_bytes -= sctp_data_size(chunk); transport->flight_size -= sctp_data_size(chunk); @@ -467,6 +468,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, switch(reason) { case SCTP_RTXR_T3_RTX: + SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -475,12 +477,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_assoc_update_retran_path(transport->asoc); break; case SCTP_RTXR_FAST_RTX: + SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); fast_retransmit = 1; break; case SCTP_RTXR_PMTUD: - default: + SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); break; + default: + BUG(); } sctp_retransmit_mark(q, transport, fast_retransmit); @@ -691,7 +696,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (!new_transport) { new_transport = asoc->peer.active_path; - } else if (new_transport->state == SCTP_INACTIVE) { + } else if ((new_transport->state == SCTP_INACTIVE) || + (new_transport->state == SCTP_UNCONFIRMED)) { /* If the chunk is Heartbeat or Heartbeat Ack, * send it to chunk->transport, even if it's * inactive. @@ -848,7 +854,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) */ new_transport = chunk->transport; if (!new_transport || - new_transport->state == SCTP_INACTIVE) + ((new_transport->state == SCTP_INACTIVE) || + (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; /* Change packets if necessary. */ @@ -1464,7 +1471,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Mark the destination transport address as * active if it is not so marked. */ - if (transport->state == SCTP_INACTIVE) { + if ((transport->state == SCTP_INACTIVE) || + (transport->state == SCTP_UNCONFIRMED)) { sctp_assoc_control_transport( transport->asoc, transport, diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5b3b0e0ae7e5..a356d8d310a9 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), + SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), + SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), + SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), + SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), + SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), + SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), + SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), + SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), + SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), + SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), + SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), + SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), + SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), + SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), + SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), SNMP_MIB_SENTINEL }; @@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, - (sk->sk_rcvbuf - assoc->rwnd), assoc->sndbuf_used, + (sk->sk_rcvbuf - assoc->rwnd), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 816c033d7886..fac7674438a4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -61,7 +61,7 @@ #include <net/inet_ecn.h> /* Global data structures. */ -struct sctp_globals sctp_globals; +struct sctp_globals sctp_globals __read_mostly; struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; @@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific; kmem_cache_t *sctp_chunk_cachep __read_mostly; kmem_cache_t *sctp_bucket_cachep __read_mostly; -extern int sctp_snmp_proc_init(void); -extern int sctp_snmp_proc_exit(void); -extern int sctp_eps_proc_init(void); -extern int sctp_eps_proc_exit(void); -extern int sctp_assocs_proc_init(void); -extern int sctp_assocs_proc_exit(void); - /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { @@ -240,7 +233,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (((AF_INET6 == addr->a.sa.sa_family) && (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { - error = sctp_add_bind_addr(bp, &addr->a, + error = sctp_add_bind_addr(bp, &addr->a, 1, GFP_ATOMIC); if (error) goto end_copy; @@ -486,6 +479,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, list_for_each(pos, &bp->address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (!laddr->use_as_src) + continue; sctp_v4_dst_saddr(&dst_saddr, dst, bp->port); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; @@ -506,7 +501,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, list_for_each(pos, &bp->address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (AF_INET == laddr->a.sa.sa_family) { + if ((laddr->use_as_src) && + (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; if (!ip_route_output_key(&rt, &fl)) { dst = &rt->u.dst; @@ -1046,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_rto_beta = SCTP_RTO_BETA; /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = 60 * HZ; + sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; /* Whether Cookie Preservative is enabled(1) or not(0) */ sctp_cookie_preserve_enable = 1; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2a8773691695..507dff72c585 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -806,38 +806,26 @@ no_mem: /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error. */ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const struct msghdr *msg) + const struct msghdr *msg, + size_t paylen) { struct sctp_chunk *retval; - void *payload = NULL, *payoff; - size_t paylen = 0; - struct iovec *iov = NULL; - int iovlen = 0; - - if (msg) { - iov = msg->msg_iov; - iovlen = msg->msg_iovlen; - paylen = get_user_iov_size(iov, iovlen); - } + void *payload = NULL; + int err; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen); + retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen); if (!retval) goto err_chunk; if (paylen) { /* Put the msg_iov together into payload. */ - payload = kmalloc(paylen, GFP_ATOMIC); + payload = kmalloc(paylen, GFP_KERNEL); if (!payload) goto err_payload; - payoff = payload; - for (; iovlen > 0; --iovlen) { - if (copy_from_user(payoff, iov->iov_base,iov->iov_len)) - goto err_copy; - payoff += iov->iov_len; - iov++; - } + err = memcpy_fromiovec(payload, msg->msg_iov, paylen); + if (err < 0) + goto err_copy; } sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen); @@ -1294,10 +1282,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, retval = kmalloc(*cookie_len, GFP_ATOMIC); - if (!retval) { - *cookie_len = 0; + if (!retval) goto nodata; - } /* Clear this memory since we are sending this data structure * out on the network. @@ -1333,19 +1319,29 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); if (sctp_sk(ep->base.sk)->hmac) { + struct hash_desc desc; + /* Sign the message. */ sg.page = virt_to_page(&cookie->c); sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE; sg.length = bodysize; keylen = SCTP_SECRET_SIZE; key = (char *)ep->secret_key[ep->current_key]; + desc.tfm = sctp_sk(ep->base.sk)->hmac; + desc.flags = 0; - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, - &sg, 1, cookie->signature); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) + goto free_cookie; } -nodata: return retval; + +free_cookie: + kfree(retval); +nodata: + *cookie_len = 0; + return NULL; } /* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */ @@ -1366,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( sctp_scope_t scope; struct sk_buff *skb = chunk->skb; struct timeval tv; + struct hash_desc desc; /* Header size is static data prior to the actual cookie, including * any padding. @@ -1401,17 +1398,25 @@ struct sctp_association *sctp_unpack_cookie( sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE; sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; + desc.tfm = sctp_sk(ep->base.sk)->hmac; + desc.flags = 0; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, - 1, digest); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, digest)) { + *error = -SCTP_IERROR_NOMEM; + goto fail; + } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Try the previous key. */ key = (char *)ep->secret_key[ep->last_key]; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, - &sg, 1, digest); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, digest)) { + *error = -SCTP_IERROR_NOMEM; + goto fail; + } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Yikes! Still bad signature! */ @@ -1442,8 +1447,16 @@ no_hmac: /* Check to see if the cookie is stale. If there is already * an association, there is no need to check cookie's expiration * for init collision case of lost COOKIE ACK. + * If skb has been timestamped, then use the stamp, otherwise + * use current time. This introduces a small possibility that + * that a cookie may be considered expired, but his would only slow + * down the new association establishment instead of every packet. */ - skb_get_timestamp(skb, &tv); + if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) + skb_get_timestamp(skb, &tv); + else + do_gettimeofday(&tv); + if (!asoc && tv_lt(bear_cookie->expiration, tv)) { __u16 len; /* @@ -1493,7 +1506,7 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { - sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, + sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, GFP_ATOMIC); } @@ -2017,7 +2030,7 @@ static int sctp_process_param(struct sctp_association *asoc, af->from_addr_param(&addr, param.addr, asoc->peer.port, 0); scope = sctp_scope(peer_addr); if (sctp_in_scope(&addr, scope)) - if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_ACTIVE)) + if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) return 0; break; @@ -2418,7 +2431,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc, * Due to Resource Shortage'. */ - peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_ACTIVE); + peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_UNCONFIRMED); if (!peer) return SCTP_ERROR_RSRC_LOW; @@ -2565,6 +2578,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, union sctp_addr_param *addr_param; struct list_head *pos; struct sctp_transport *transport; + struct sctp_sockaddr_entry *saddr; int retval = 0; addr_param = (union sctp_addr_param *) @@ -2578,7 +2592,11 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, case SCTP_PARAM_ADD_IP: sctp_local_bh_disable(); sctp_write_lock(&asoc->base.addr_lock); - retval = sctp_add_bind_addr(bp, &addr, GFP_ATOMIC); + list_for_each(pos, &bp->address_list) { + saddr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (sctp_cmp_addr_exact(&saddr->a, &addr)) + saddr->use_as_src = 1; + } sctp_write_unlock(&asoc->base.addr_lock); sctp_local_bh_enable(); break; @@ -2591,6 +2609,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, list_for_each(pos, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); + dst_release(transport->dst); sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c5beb2ad7ef7..9c10bdec1afe 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -430,7 +430,11 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ - asoc->overall_error_count++; + /* When probing UNCONFIRMED addresses, the association overall + * error count is NOT incremented + */ + if (transport->state != SCTP_UNCONFIRMED) + asoc->overall_error_count++; if (transport->state != SCTP_INACTIVE && (transport->error_count++ >= transport->pathmaxrxt)) { @@ -610,7 +614,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Mark the destination transport address as active if it is not so * marked. */ - if (t->state == SCTP_INACTIVE) + if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED)) sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); @@ -620,6 +624,10 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, */ hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); + + /* Update the heartbeat timer. */ + if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) + sctp_transport_hold(t); } /* Helper function to do a transport reset at the expiry of the hearbeat diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9e58144f4851..1c42fe983a5b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -187,10 +187,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 0, 0, 0, GFP_ATOMIC); - if (!ev) - goto nomem; - - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + if (ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint * will verify that it is in SHUTDOWN-ACK-SENT state, if it is @@ -215,9 +214,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; - -nomem: - return SCTP_DISPOSITION_NOMEM; } /* @@ -347,8 +343,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, GFP_ATOMIC)) goto nomem_init; - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); - /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, @@ -360,11 +354,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, sizeof(sctp_chunkhdr_t); if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) - goto nomem_ack; + goto nomem_init; repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) - goto nomem_ack; + goto nomem_init; /* If there are errors need to be reported for unknown parameters, * include them in the outgoing INIT ACK as "Unrecognized parameter" @@ -388,6 +382,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, sctp_chunk_free(err_chunk); } + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* @@ -400,12 +396,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DELETE_TCB; -nomem_ack: - if (err_chunk) - sctp_chunk_free(err_chunk); nomem_init: sctp_association_free(new_asoc); nomem: + if (err_chunk) + sctp_chunk_free(err_chunk); return SCTP_DISPOSITION_NOMEM; } @@ -600,7 +595,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, struct sctp_association *new_asoc; sctp_init_chunk_t *peer_init; struct sctp_chunk *repl; - struct sctp_ulpevent *ev; + struct sctp_ulpevent *ev, *ai_ev = NULL; int error = 0; struct sctp_chunk *err_chk_p; @@ -659,20 +654,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, }; } - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, - SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); - sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - - if (new_asoc->autoclose) - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - /* Re-build the bind address for the association is done in + /* Delay state machine commands until later. + * + * Re-build the bind address for the association is done in * the sctp_unpack_cookie() already. */ /* This is a brand-new association, so these are not yet side @@ -687,9 +672,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) - goto nomem_repl; - - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + goto nomem_init; /* RFC 2960 5.1 Normal Establishment of an Association * @@ -704,28 +687,53 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, if (!ev) goto nomem_ev; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaption Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaption layer. */ if (new_asoc->peer.adaption_ind) { - ev = sctp_ulpevent_make_adaption_indication(new_asoc, + ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc, GFP_ATOMIC); - if (!ev) - goto nomem_ev; + if (!ai_ev) + goto nomem_aiev; + } + + /* Add all the state machine commands now since we've created + * everything. This way we don't introduce memory corruptions + * during side-effect processing and correclty count established + * associations. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); + + if (new_asoc->autoclose) + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); + + /* This will send the COOKIE ACK */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + + /* Queue the ASSOC_CHANGE event */ + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + + /* Send up the Adaptation Layer Indication event */ + if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); - } + SCTP_ULPEVENT(ai_ev)); return SCTP_DISPOSITION_CONSUME; +nomem_aiev: + sctp_ulpevent_free(ev); nomem_ev: sctp_chunk_free(repl); -nomem_repl: nomem_init: sctp_association_free(new_asoc); nomem: @@ -846,6 +854,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t)); hbinfo.daddr = transport->ipaddr; hbinfo.sent_at = jiffies; + hbinfo.hb_nonce = transport->hb_nonce; /* Send a heartbeat to our peer. */ paylen = sizeof(sctp_sender_hb_info_t); @@ -1048,6 +1057,10 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DISCARD; } + /* Validate the 64-bit random nonce. */ + if (hbinfo->hb_nonce != link->hb_nonce) + return SCTP_DISPOSITION_DISCARD; + max_interval = link->hbinterval + link->rto; /* Check if the timestamp looks valid. */ @@ -1355,10 +1368,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, sctp_source(chunk), (sctp_init_chunk_t *)chunk->chunk_hdr, - GFP_ATOMIC)) { - retval = SCTP_DISPOSITION_NOMEM; - goto nomem_init; - } + GFP_ATOMIC)) + goto nomem; /* Make sure no new addresses are being added during the * restart. Do not do this check for COOKIE-WAIT state, @@ -1369,7 +1380,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) { retval = SCTP_DISPOSITION_CONSUME; - goto cleanup_asoc; + goto nomem_retval; } } @@ -1425,17 +1436,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); retval = SCTP_DISPOSITION_CONSUME; + return retval; + +nomem: + retval = SCTP_DISPOSITION_NOMEM; +nomem_retval: + if (new_asoc) + sctp_association_free(new_asoc); cleanup: if (err_chunk) sctp_chunk_free(err_chunk); return retval; -nomem: - retval = SCTP_DISPOSITION_NOMEM; - goto cleanup; -nomem_init: -cleanup_asoc: - sctp_association_free(new_asoc); - goto cleanup; } /* @@ -1606,15 +1617,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, */ sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); - /* Update the content of current association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); - repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - /* Report association restart to upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, new_asoc->c.sinit_num_ostreams, @@ -1623,6 +1629,9 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, if (!ev) goto nomem_ev; + /* Update the content of current association. */ + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); return SCTP_DISPOSITION_CONSUME; @@ -1746,7 +1755,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - struct sctp_ulpevent *ev = NULL; + struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: @@ -1773,29 +1782,25 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, * SCTP user upon reception of a valid COOKIE * ECHO chunk. */ - ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, + ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0, - new_asoc->c.sinit_num_ostreams, - new_asoc->c.sinit_max_instreams, + asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, GFP_ATOMIC); if (!ev) goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaption Layer Indication parameter, * SCTP delivers this notification to inform the application * that of the peers requested adaption layer. */ - if (new_asoc->peer.adaption_ind) { - ev = sctp_ulpevent_make_adaption_indication(new_asoc, + if (asoc->peer.adaption_ind) { + ai_ev = sctp_ulpevent_make_adaption_indication(asoc, GFP_ATOMIC); - if (!ev) + if (!ai_ev) goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); } } sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); @@ -1804,12 +1809,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, if (!repl) goto nomem; + if (ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + if (ai_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ai_ev)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; nomem: + if (ai_ev) + sctp_ulpevent_free(ai_ev); if (ev) sctp_ulpevent_free(ev); return SCTP_DISPOSITION_NOMEM; @@ -2658,9 +2672,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3012,7 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) return sctp_sf_violation_chunklen(ep, asoc, type, arg, commands); - /* 10.2 H) SHUTDOWN COMPLETE notification * * When SCTP completes the shutdown procedures (section 9.2) this @@ -3023,6 +3038,14 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, if (!ev) goto nomem; + /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ + reply = sctp_make_shutdown_complete(asoc, chunk); + if (!reply) + goto nomem_chunk; + + /* Do all the commands now (after allocation), so that we + * have consistent state if memory allocation failes + */ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall @@ -3034,11 +3057,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ - reply = sctp_make_shutdown_complete(asoc, chunk); - if (!reply) - goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); @@ -3049,6 +3067,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; +nomem_chunk: + sctp_ulpevent_free(ev); nomem: return SCTP_DISPOSITION_NOMEM; } @@ -3647,6 +3667,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4026,18 +4047,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( * from its upper layer, but retransmits data to the far end * if necessary to fill gaps. */ - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer. */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4161,8 +4176,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( void *arg, sctp_cmd_seq_t *commands) { - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; /* Stop T1-init timer */ @@ -4170,12 +4184,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); @@ -4555,6 +4564,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; + SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4623,6 +4634,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -4657,6 +4669,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -4716,6 +4729,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -4760,6 +4774,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4821,6 +4837,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; + SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in * RFC2960 [5] section 8.1 and 8.2. @@ -4887,6 +4905,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -4917,6 +4936,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; + SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper * layer, the endpoint enters SHUTDOWN-PENDING state and @@ -5278,7 +5299,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, datalen -= sizeof(sctp_data_chunk_t); deliver = SCTP_CMD_CHUNK_ULP; - chunk->data_accepted = 1; /* Think about partial delivery. */ if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) { @@ -5357,6 +5377,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, if (SCTP_CMD_CHUNK_ULP == deliver) sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); + chunk->data_accepted = 1; + /* Note: Some chunks may get overcounted (if we drop) or overcounted * if we renege and the chunk arrives again. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b811691c35bf..3fe906d65069 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,7 +57,6 @@ * be incorporated into the next SCTP release. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/wait.h> @@ -370,7 +369,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Use GFP_ATOMIC since BHs are disabled. */ addr->v4.sin_port = ntohs(addr->v4.sin_port); - ret = sctp_add_bind_addr(bp, addr, GFP_ATOMIC); + ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); addr->v4.sin_port = htons(addr->v4.sin_port); sctp_write_unlock(&ep->base.addr_lock); sctp_local_bh_enable(); @@ -492,6 +491,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sctp_chunk *chunk; struct sctp_sockaddr_entry *laddr; union sctp_addr *addr; + union sctp_addr saveaddr; void *addr_buf; struct sctp_af *af; struct list_head *pos; @@ -559,14 +559,26 @@ static int sctp_send_asconf_add_ip(struct sock *sk, } retval = sctp_send_asconf(asoc, chunk); + if (retval) + goto out; - /* FIXME: After sending the add address ASCONF chunk, we - * cannot append the address to the association's binding - * address list, because the new address may be used as the - * source of a message sent to the peer before the ASCONF - * chunk is received by the peer. So we should wait until - * ASCONF_ACK is received. + /* Add the new addresses to the bind address list with + * use_as_src set to 0. */ + sctp_local_bh_disable(); + sctp_write_lock(&asoc->base.addr_lock); + addr_buf = addrs; + for (i = 0; i < addrcnt; i++) { + addr = (union sctp_addr *)addr_buf; + af = sctp_get_af_specific(addr->v4.sin_family); + memcpy(&saveaddr, addr, af->sockaddr_len); + saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); + retval = sctp_add_bind_addr(bp, &saveaddr, 0, + GFP_ATOMIC); + addr_buf += af->sockaddr_len; + } + sctp_write_unlock(&asoc->base.addr_lock); + sctp_local_bh_enable(); } out: @@ -677,12 +689,15 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; + struct sctp_transport *transport; struct sctp_bind_addr *bp; struct sctp_chunk *chunk; union sctp_addr *laddr; + union sctp_addr saveaddr; void *addr_buf; struct sctp_af *af; - struct list_head *pos; + struct list_head *pos, *pos1; + struct sctp_sockaddr_entry *saddr; int i; int retval = 0; @@ -749,14 +764,42 @@ static int sctp_send_asconf_del_ip(struct sock *sk, goto out; } - retval = sctp_send_asconf(asoc, chunk); + /* Reset use_as_src flag for the addresses in the bind address + * list that are to be deleted. + */ + sctp_local_bh_disable(); + sctp_write_lock(&asoc->base.addr_lock); + addr_buf = addrs; + for (i = 0; i < addrcnt; i++) { + laddr = (union sctp_addr *)addr_buf; + af = sctp_get_af_specific(laddr->v4.sin_family); + memcpy(&saveaddr, laddr, af->sockaddr_len); + saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); + list_for_each(pos1, &bp->address_list) { + saddr = list_entry(pos1, + struct sctp_sockaddr_entry, + list); + if (sctp_cmp_addr_exact(&saddr->a, &saveaddr)) + saddr->use_as_src = 0; + } + addr_buf += af->sockaddr_len; + } + sctp_write_unlock(&asoc->base.addr_lock); + sctp_local_bh_enable(); - /* FIXME: After sending the delete address ASCONF chunk, we - * cannot remove the addresses from the association's bind - * address list, because there maybe some packet send to - * the delete addresses, so we should wait until ASCONF_ACK - * packet is received. + /* Update the route and saddr entries for all the transports + * as some of the addresses in the bind address list are + * about to be deleted and cannot be used as source addresses. */ + list_for_each(pos1, &asoc->peer.transport_addr_list) { + transport = list_entry(pos1, struct sctp_transport, + transports); + dst_release(transport->dst); + sctp_transport_route(transport, NULL, + sctp_sk(asoc->base.sk)); + } + + retval = sctp_send_asconf(asoc, chunk); } out: return retval; @@ -1246,9 +1289,13 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) } } - if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) - sctp_primitive_ABORT(asoc, NULL); - else + if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, NULL, 0); + if (chunk) + sctp_primitive_ABORT(asoc, chunk); + } else sctp_primitive_SHUTDOWN(asoc, NULL); } @@ -1477,8 +1524,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } if (sinfo_flags & SCTP_ABORT) { + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, msg, msg_len); + if (!chunk) { + err = -ENOMEM; + goto out_unlock; + } + SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, msg); + sctp_primitive_ABORT(asoc, chunk); err = 0; goto out_unlock; } @@ -2026,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, * SPP_SACKDELAY_ENABLE, setting both will have undefined * results. */ -int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, - struct sctp_transport *trans, - struct sctp_association *asoc, - struct sctp_sock *sp, - int hb_change, - int pmtud_change, - int sackdelay_change) +static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, + struct sctp_transport *trans, + struct sctp_association *asoc, + struct sctp_sock *sp, + int hb_change, + int pmtud_change, + int sackdelay_change) { int error; @@ -2915,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) goto out; } - timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); error = sctp_wait_for_accept(sk, timeo); if (error) @@ -2990,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max); + sp->initmsg.sinit_max_init_timeo = sctp_rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial); - sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max); - sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min); + sp->rtoinfo.srto_initial = sctp_rto_initial; + sp->rtoinfo.srto_max = sctp_rto_max; + sp->rtoinfo.srto_min = sctp_rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. @@ -3006,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = - jiffies_to_msecs(sctp_valid_cookie_life); + sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3017,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); + sp->hbinterval = sctp_hb_interval; sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout); + sp->sackdelay = sctp_sack_timeout; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; @@ -3030,8 +3084,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) */ sp->disable_fragments = 0; - /* Turn on/off any Nagle-like algorithm. */ - sp->nodelay = 1; + /* Enable Nagle algorithm by default. */ + sp->nodelay = 0; /* Enable by default. */ sp->v4mapped = 1; @@ -4843,7 +4897,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) int sctp_inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; - struct crypto_tfm *tfm=NULL; + struct crypto_hash *tfm = NULL; int err = -EINVAL; if (unlikely(backlog < 0)) @@ -4856,7 +4910,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) /* Allocate HMAC for generating cookie. */ if (sctp_hmac_alg) { - tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0); + tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (!tfm) { err = -ENOSYS; goto out; @@ -4882,7 +4936,7 @@ out: sctp_release_sock(sk); return err; cleanup: - sctp_crypto_free_tfm(tfm); + crypto_free_hash(tfm); goto out; } @@ -4978,7 +5032,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( /* Caller must hold hashbucket lock for this tb with local BH disabled */ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) { - if (hlist_empty(&pp->owner)) { + if (pp && hlist_empty(&pp->owner)) { if (pp->next) pp->next->pprev = pp->pprev; *(pp->pprev) = pp->next; @@ -5564,6 +5618,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly */ + if (PF_INET6 == assoc->base.sk->sk_family) + flags = SCTP_ADDR6_ALLOWED; if (assoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (assoc->peer.ipv6_address) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index dc6f3ff32358..633cd178654b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -45,9 +45,10 @@ #include <net/sctp/sctp.h> #include <linux/sysctl.h> -static ctl_handler sctp_sysctl_jiffies_ms; -static long rto_timer_min = 1; -static long rto_timer_max = 86400000; /* One day */ +static int zero = 0; +static int one = 1; +static int timer_max = 86400000; /* ms in one day */ +static int int_max = INT_MAX; static long sack_timer_min = 1; static long sack_timer_max = 500; @@ -56,45 +57,45 @@ static ctl_table sctp_table[] = { .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_MAX_BURST, @@ -102,7 +103,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, @@ -110,7 +114,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_SNDBUF_POLICY, @@ -118,7 +125,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -126,7 +134,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -134,7 +143,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, @@ -142,18 +154,21 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_PRESERVE_ENABLE, @@ -161,23 +176,26 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -185,7 +203,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -193,7 +212,8 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -201,8 +221,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void) { unregister_sysctl_table(sctp_sysctl_header); } - -/* Strategy function to convert jiffies to milliseconds. */ -static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) { - - if (oldval) { - size_t olen; - - if (oldlenp) { - if (get_user(olen, oldlenp)) - return -EFAULT; - - if (olen != sizeof (int)) - return -EINVAL; - } - if (put_user((*(int *)(table->data) * 1000) / HZ, - (int __user *)oldval) || - (oldlenp && put_user(sizeof (int), oldlenp))) - return -EFAULT; - } - if (newval && newlen) { - int new; - - if (newlen != sizeof (int)) - return -EINVAL; - - if (get_user(new, (int __user *)newval)) - return -EFAULT; - - *(int *)(table->data) = (new * HZ) / 1000; - } - return 1; -} diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 160f62ad1cc5..3e5936a5f671 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -49,6 +49,7 @@ */ #include <linux/types.h> +#include <linux/random.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> @@ -74,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rtt = 0; - peer->rto = sctp_rto_initial; + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; @@ -85,7 +86,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->init_sent_count = 0; - peer->state = SCTP_ACTIVE; peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; @@ -109,6 +109,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->hb_timer.function = sctp_generate_heartbeat_event; peer->hb_timer.data = (unsigned long)peer; + /* Initialize the 64-bit random nonce sent with heartbeat. */ + get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); + atomic_set(&peer->refcnt, 1); peer->dead = 0; @@ -517,7 +520,9 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; - timeout = t->hbinterval + t->rto + sctp_jitter(t->rto); + timeout = t->rto + sctp_jitter(t->rto); + if (t->state != SCTP_UNCONFIRMED) + timeout += t->hbinterval; timeout += jiffies; return timeout; } diff --git a/net/socket.c b/net/socket.c index 565f5e8d1191..1bc4167e0da8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -42,7 +42,7 @@ * Andi Kleen : Some small cleanups, optimizations, * and fixed a copy_from_user() bug. * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) - * Tigran Aivazian : Made listen(2) backlog sanity checks + * Tigran Aivazian : Made listen(2) backlog sanity checks * protocol-independent * * @@ -53,18 +53,17 @@ * * * This module is effectively the top level interface to the BSD socket - * paradigm. + * paradigm. * * Based upon Swansea University Computer Society NET3.039 */ -#include <linux/config.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/socket.h> #include <linux/file.h> #include <linux/net.h> #include <linux/interrupt.h> +#include <linux/rcupdate.h> #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -97,25 +96,24 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, - size_t size, loff_t pos); + size_t size, loff_t pos); static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, - size_t size, loff_t pos); -static int sock_mmap(struct file *file, struct vm_area_struct * vma); + size_t size, loff_t pos); +static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); static unsigned int sock_poll(struct file *file, struct poll_table_struct *wait); -static long sock_ioctl(struct file *file, - unsigned int cmd, unsigned long arg); +static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, - unsigned int cmd, unsigned long arg); + unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_readv(struct file *file, const struct iovec *vector, unsigned long count, loff_t *ppos); static ssize_t sock_writev(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); + unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); @@ -148,52 +146,8 @@ static struct file_operations socket_file_ops = { * The protocol list. Each protocol is registered in here. */ -static struct net_proto_family *net_families[NPROTO]; - -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) -static atomic_t net_family_lockct = ATOMIC_INIT(0); static DEFINE_SPINLOCK(net_family_lock); - -/* The strategy is: modifications net_family vector are short, do not - sleep and veeery rare, but read access should be free of any exclusive - locks. - */ - -static void net_family_write_lock(void) -{ - spin_lock(&net_family_lock); - while (atomic_read(&net_family_lockct) != 0) { - spin_unlock(&net_family_lock); - - yield(); - - spin_lock(&net_family_lock); - } -} - -static __inline__ void net_family_write_unlock(void) -{ - spin_unlock(&net_family_lock); -} - -static __inline__ void net_family_read_lock(void) -{ - atomic_inc(&net_family_lockct); - spin_unlock_wait(&net_family_lock); -} - -static __inline__ void net_family_read_unlock(void) -{ - atomic_dec(&net_family_lockct); -} - -#else -#define net_family_write_lock() do { } while(0) -#define net_family_write_unlock() do { } while(0) -#define net_family_read_lock() do { } while(0) -#define net_family_read_unlock() do { } while(0) -#endif - +static const struct net_proto_family *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -202,19 +156,20 @@ static __inline__ void net_family_read_unlock(void) static DEFINE_PER_CPU(int, sockets_in_use) = 0; /* - * Support routines. Move socket addresses back and forth across the kernel/user - * divide and look after the messy bits. + * Support routines. + * Move socket addresses back and forth across the kernel/user + * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, 24 for IPv6, - about 80 for AX.25 + about 80 for AX.25 must be at least one bigger than the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). + :unix_mkname()). */ - + /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space @@ -228,11 +183,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) { - if(ulen<0||ulen>MAX_SOCK_ADDR) + if (ulen < 0 || ulen > MAX_SOCK_ADDR) return -EINVAL; - if(ulen==0) + if (ulen == 0) return 0; - if(copy_from_user(kaddr,uaddr,ulen)) + if (copy_from_user(kaddr, uaddr, ulen)) return -EFAULT; return audit_sockaddr(ulen, kaddr); } @@ -253,51 +208,52 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) * length of the data is written over the length limit the user * specified. Zero is returned for a success. */ - -int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen) + +int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, + int __user *ulen) { int err; int len; - if((err=get_user(len, ulen))) + err = get_user(len, ulen); + if (err) return err; - if(len>klen) - len=klen; - if(len<0 || len> MAX_SOCK_ADDR) + if (len > klen) + len = klen; + if (len < 0 || len > MAX_SOCK_ADDR) return -EINVAL; - if(len) - { + if (len) { if (audit_sockaddr(klen, kaddr)) return -ENOMEM; - if(copy_to_user(uaddr,kaddr,len)) + if (copy_to_user(uaddr, kaddr, len)) return -EFAULT; } /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g + * "fromlen shall refer to the value before truncation.." + * 1003.1g */ return __put_user(klen, ulen); } #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t * sock_inode_cachep __read_mostly; +static kmem_cache_t *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); + + ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wait); - + ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; ei->socket.sk = NULL; ei->socket.file = NULL; - ei->socket.flags = 0; return &ei->vfs_inode; } @@ -308,22 +264,25 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { - struct socket_alloc *ei = (struct socket_alloc *) foo; + struct socket_alloc *ei = (struct socket_alloc *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) + == SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { sock_inode_cachep = kmem_cache_create("sock_inode_cache", - sizeof(struct socket_alloc), - 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); + sizeof(struct socket_alloc), + 0, + (SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD), + init_once, + NULL); if (sock_inode_cachep == NULL) return -ENOMEM; return 0; @@ -336,7 +295,8 @@ static struct super_operations sockfs_ops = { }; static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, mnt); @@ -349,12 +309,13 @@ static struct file_system_type sock_fs_type = { .get_sb = sockfs_get_sb, .kill_sb = kill_anon_super, }; + static int sockfs_delete_dentry(struct dentry *dentry) { return 1; } static struct dentry_operations sockfs_dentry_operations = { - .d_delete = sockfs_delete_dentry, + .d_delete = sockfs_delete_dentry, }; /* @@ -478,10 +439,12 @@ struct socket *sockfd_lookup(int fd, int *err) struct file *file; struct socket *sock; - if (!(file = fget(fd))) { + file = fget(fd); + if (!file) { *err = -EBADF; return NULL; } + sock = sock_from_file(file, err); if (!sock) fput(file); @@ -506,7 +469,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) /** * sock_alloc - allocate a socket - * + * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes * NULL is returned. @@ -514,8 +477,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) static struct socket *sock_alloc(void) { - struct inode * inode; - struct socket * sock; + struct inode *inode; + struct socket *sock; inode = new_inode(sock_mnt->mnt_sb); if (!inode) @@ -523,7 +486,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); - inode->i_mode = S_IFSOCK|S_IRWXUGO; + inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -537,7 +500,7 @@ static struct socket *sock_alloc(void) * a back door. Remember to keep it shut otherwise you'll let the * creepy crawlies in. */ - + static int sock_no_open(struct inode *irrelevant, struct file *dontcare) { return -ENXIO; @@ -554,9 +517,9 @@ const struct file_operations bad_sock_fops = { * * The socket is released from the protocol stack if it has a release * callback, and the inode is then released if the socket is bound to - * an inode not a file. + * an inode not a file. */ - + void sock_release(struct socket *sock) { if (sock->ops) { @@ -576,10 +539,10 @@ void sock_release(struct socket *sock) iput(SOCK_INODE(sock)); return; } - sock->file=NULL; + sock->file = NULL; } -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, +static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); @@ -622,14 +585,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, * the following is safe, since for compiler definitions of kvec and * iovec are identical, yielding the same in-core layout and alignment */ - msg->msg_iov = (struct iovec *)vec, + msg->msg_iov = (struct iovec *)vec; msg->msg_iovlen = num; result = sock_sendmsg(sock, msg, size); set_fs(oldfs); return result; } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { int err; @@ -648,14 +611,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, return sock->ops->recvmsg(iocb, sock, msg, size, flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; struct sock_iocb siocb; int ret; - init_sync_kiocb(&iocb, NULL); + init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) @@ -663,9 +626,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, return ret; } -int kernel_recvmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, - size_t size, int flags) +int kernel_recvmsg(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size, int flags) { mm_segment_t oldfs = get_fs(); int result; @@ -675,8 +637,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, * the following is safe, since for compiler definitions of kvec and * iovec are identical, yielding the same in-core layout and alignment */ - msg->msg_iov = (struct iovec *)vec, - msg->msg_iovlen = num; + msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; result = sock_recvmsg(sock, msg, size, flags); set_fs(oldfs); return result; @@ -703,7 +664,8 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - char __user *ubuf, size_t size, struct sock_iocb *siocb) + char __user *ubuf, size_t size, + struct sock_iocb *siocb) { if (!is_sync_kiocb(iocb)) { siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); @@ -721,20 +683,21 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, } static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, unsigned long nr_segs) + struct file *file, struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; int i; - for (i = 0 ; i < nr_segs ; i++) - size += iov[i].iov_len; + for (i = 0; i < nr_segs; i++) + size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *) iov; + msg->msg_iov = (struct iovec *)iov; msg->msg_iovlen = nr_segs; msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; @@ -749,7 +712,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, struct msghdr msg; int ret; - init_sync_kiocb(&iocb, NULL); + init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); @@ -759,7 +722,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, } static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t count, loff_t pos) + size_t count, loff_t pos) { struct sock_iocb siocb, *x; @@ -772,24 +735,25 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, if (!x) return -ENOMEM; return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + &x->async_iov, 1); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, unsigned long nr_segs) + struct file *file, struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; int i; - for (i = 0 ; i < nr_segs ; i++) - size += iov[i].iov_len; + for (i = 0; i < nr_segs; i++) + size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *) iov; + msg->msg_iov = (struct iovec *)iov; msg->msg_iovlen = nr_segs; msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; if (sock->type == SOCK_SEQPACKET) @@ -816,7 +780,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov, } static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t count, loff_t pos) + size_t count, loff_t pos) { struct sock_iocb siocb, *x; @@ -830,46 +794,48 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, return -ENOMEM; return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + &x->async_iov, 1); } - /* * Atomic setting of ioctl hooks to avoid race * with module unload. */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook)(unsigned int, void __user *)) +void brioctl_set(int (*hook) (unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } + EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook)(void __user *arg); +static int (*vlan_ioctl_hook) (void __user *arg); -void vlan_ioctl_set(int (*hook)(void __user *)) +void vlan_ioctl_set(int (*hook) (void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } + EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); -static int (*dlci_ioctl_hook)(unsigned int, void __user *); +static int (*dlci_ioctl_hook) (unsigned int, void __user *); -void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)) +void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) { mutex_lock(&dlci_ioctl_mutex); dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } + EXPORT_SYMBOL(dlci_ioctl_set); /* @@ -891,8 +857,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(cmd, argp); } else -#endif /* CONFIG_WIRELESS_EXT */ - switch (cmd) { +#endif /* CONFIG_WIRELESS_EXT */ + switch (cmd) { case FIOSETOWN: case SIOCSPGRP: err = -EFAULT; @@ -902,7 +868,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case FIOGETOWN: case SIOCGPGRP: - err = put_user(sock->file->f_owner.pid, (int __user *)argp); + err = put_user(sock->file->f_owner.pid, + (int __user *)argp); break; case SIOCGIFBR: case SIOCSIFBR: @@ -913,7 +880,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) request_module("bridge"); mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) + if (br_ioctl_hook) err = br_ioctl_hook(cmd, argp); mutex_unlock(&br_ioctl_mutex); break; @@ -930,7 +897,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case SIOCGIFDIVERT: case SIOCSIFDIVERT: - /* Convert this to call through a hook */ + /* Convert this to call through a hook */ err = divert_ioctl(cmd, argp); break; case SIOCADDDLCI: @@ -955,7 +922,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (err == -ENOIOCTLCMD) err = dev_ioctl(cmd, argp); break; - } + } return err; } @@ -963,7 +930,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; struct socket *sock = NULL; - + err = security_socket_create(family, type, protocol, 1); if (err) goto out; @@ -974,26 +941,33 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) goto out; } - security_socket_post_create(sock, family, type, protocol, 1); sock->type = type; + err = security_socket_post_create(sock, family, type, protocol, 1); + if (err) + goto out_release; + out: *res = sock; return err; +out_release: + sock_release(sock); + sock = NULL; + goto out; } /* No kernel lock held - perfect */ -static unsigned int sock_poll(struct file *file, poll_table * wait) +static unsigned int sock_poll(struct file *file, poll_table *wait) { struct socket *sock; /* - * We can't return errors to poll, so it's either yes or no. + * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; return sock->ops->poll(file, sock, wait); } -static int sock_mmap(struct file * file, struct vm_area_struct * vma) +static int sock_mmap(struct file *file, struct vm_area_struct *vma) { struct socket *sock = file->private_data; @@ -1003,12 +977,11 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) static int sock_close(struct inode *inode, struct file *filp) { /* - * It was possible the inode is NULL we were - * closing an unfinished socket. + * It was possible the inode is NULL we were + * closing an unfinished socket. */ - if (!inode) - { + if (!inode) { printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } @@ -1034,57 +1007,52 @@ static int sock_close(struct inode *inode, struct file *filp) static int sock_fasync(int fd, struct file *filp, int on) { - struct fasync_struct *fa, *fna=NULL, **prev; + struct fasync_struct *fa, *fna = NULL, **prev; struct socket *sock; struct sock *sk; - if (on) - { + if (on) { fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if(fna==NULL) + if (fna == NULL) return -ENOMEM; } sock = filp->private_data; - if ((sk=sock->sk) == NULL) { + sk = sock->sk; + if (sk == NULL) { kfree(fna); return -EINVAL; } lock_sock(sk); - prev=&(sock->fasync_list); + prev = &(sock->fasync_list); - for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev) - if (fa->fa_file==filp) + for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) + if (fa->fa_file == filp) break; - if(on) - { - if(fa!=NULL) - { + if (on) { + if (fa != NULL) { write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd=fd; + fa->fa_fd = fd; write_unlock_bh(&sk->sk_callback_lock); kfree(fna); goto out; } - fna->fa_file=filp; - fna->fa_fd=fd; - fna->magic=FASYNC_MAGIC; - fna->fa_next=sock->fasync_list; + fna->fa_file = filp; + fna->fa_fd = fd; + fna->magic = FASYNC_MAGIC; + fna->fa_next = sock->fasync_list; write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list=fna; + sock->fasync_list = fna; write_unlock_bh(&sk->sk_callback_lock); - } - else - { - if (fa!=NULL) - { + } else { + if (fa != NULL) { write_lock_bh(&sk->sk_callback_lock); - *prev=fa->fa_next; + *prev = fa->fa_next; write_unlock_bh(&sk->sk_callback_lock); kfree(fa); } @@ -1101,10 +1069,9 @@ int sock_wake_async(struct socket *sock, int how, int band) { if (!sock || !sock->fasync_list) return -1; - switch (how) - { + switch (how) { case 1: - + if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; @@ -1113,7 +1080,7 @@ int sock_wake_async(struct socket *sock, int how, int band) break; /* fall through */ case 0: - call_kill: +call_kill: __kill_fasync(sock->fasync_list, SIGIO, band); break; case 3: @@ -1122,13 +1089,15 @@ int sock_wake_async(struct socket *sock, int how, int band) return 0; } -static int __sock_create(int family, int type, int protocol, struct socket **res, int kern) +static int __sock_create(int family, int type, int protocol, + struct socket **res, int kern) { int err; struct socket *sock; + const struct net_proto_family *pf; /* - * Check protocol is in range + * Check protocol is in range */ if (family < 0 || family >= NPROTO) return -EAFNOSUPPORT; @@ -1141,10 +1110,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res deadlock in module load. */ if (family == PF_INET && type == SOCK_PACKET) { - static int warned; + static int warned; if (!warned) { warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); + printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", + current->comm); } family = PF_PACKET; } @@ -1152,78 +1122,84 @@ static int __sock_create(int family, int type, int protocol, struct socket **res err = security_socket_create(family, type, protocol, kern); if (err) return err; - + + /* + * Allocate the socket and allow the family to set things up. if + * the protocol is 0, the family is instructed to select an appropriate + * default. + */ + sock = sock_alloc(); + if (!sock) { + if (net_ratelimit()) + printk(KERN_WARNING "socket: no more sockets\n"); + return -ENFILE; /* Not exactly a match, but its the + closest posix thing */ + } + + sock->type = type; + #if defined(CONFIG_KMOD) - /* Attempt to load a protocol module if the find failed. - * - * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user + /* Attempt to load a protocol module if the find failed. + * + * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family]==NULL) - { - request_module("net-pf-%d",family); - } + if (net_families[family] == NULL) + request_module("net-pf-%d", family); #endif - net_family_read_lock(); - if (net_families[family] == NULL) { - err = -EAFNOSUPPORT; - goto out; - } - -/* - * Allocate the socket and allow the family to set things up. if - * the protocol is 0, the family is instructed to select an appropriate - * default. - */ - - if (!(sock = sock_alloc())) { - printk(KERN_WARNING "socket: no more sockets\n"); - err = -ENFILE; /* Not exactly a match, but its the - closest posix thing */ - goto out; - } - - sock->type = type; + rcu_read_lock(); + pf = rcu_dereference(net_families[family]); + err = -EAFNOSUPPORT; + if (!pf) + goto out_release; /* * We will call the ->create function, that possibly is in a loadable * module, so we have to bump that loadable module refcnt first. */ - err = -EAFNOSUPPORT; - if (!try_module_get(net_families[family]->owner)) + if (!try_module_get(pf->owner)) goto out_release; - if ((err = net_families[family]->create(sock, protocol)) < 0) { - sock->ops = NULL; + /* Now protected by module ref count */ + rcu_read_unlock(); + + err = pf->create(sock, protocol); + if (err < 0) goto out_module_put; - } /* * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. */ - if (!try_module_get(sock->ops->owner)) { - sock->ops = NULL; - goto out_module_put; - } + if (!try_module_get(sock->ops->owner)) + goto out_module_busy; + /* * Now that we're done with the ->create function, the [loadable] * module can have its refcnt decremented */ - module_put(net_families[family]->owner); + module_put(pf->owner); + err = security_socket_post_create(sock, family, type, protocol, kern); + if (err) + goto out_release; *res = sock; - security_socket_post_create(sock, family, type, protocol, kern); -out: - net_family_read_unlock(); - return err; + return 0; + +out_module_busy: + err = -EAFNOSUPPORT; out_module_put: - module_put(net_families[family]->owner); -out_release: + sock->ops = NULL; + module_put(pf->owner); +out_sock_release: sock_release(sock); - goto out; + return err; + +out_release: + rcu_read_unlock(); + goto out_sock_release; } int sock_create(int family, int type, int protocol, struct socket **res) @@ -1262,7 +1238,8 @@ out_release: * Create a pair of connected sockets. */ -asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec) +asmlinkage long sys_socketpair(int family, int type, int protocol, + int __user *usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1281,7 +1258,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u goto out_release_1; err = sock1->ops->socketpair(sock1, sock2); - if (err < 0) + if (err < 0) goto out_release_both; fd1 = fd2 = -1; @@ -1300,7 +1277,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); + err = put_user(fd1, &usockvec[0]); if (!err) err = put_user(fd2, &usockvec[1]); if (!err) @@ -1311,19 +1288,18 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u return err; out_close_1: - sock_release(sock2); + sock_release(sock2); sys_close(fd1); return err; out_release_both: - sock_release(sock2); + sock_release(sock2); out_release_1: - sock_release(sock1); + sock_release(sock1); out: return err; } - /* * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. @@ -1338,35 +1314,39 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) char address[MAX_SOCK_ADDR]; int err, fput_needed; - if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) - { - if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { - err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if(sock) { + err = move_addr_to_kernel(umyaddr, addrlen, address); + if (err >= 0) { + err = security_socket_bind(sock, + (struct sockaddr *)address, + addrlen); if (!err) err = sock->ops->bind(sock, - (struct sockaddr *)address, addrlen); + (struct sockaddr *) + address, addrlen); } fput_light(sock->file, fput_needed); - } + } return err; } - /* * Perform a listen. Basically, we allow the protocol to do anything * necessary for a listen, and if that works, we mark the socket as * ready for listening. */ -int sysctl_somaxconn = SOMAXCONN; +int sysctl_somaxconn __read_mostly = SOMAXCONN; asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; int err, fput_needed; - - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { - if ((unsigned) backlog > sysctl_somaxconn) + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock) { + if ((unsigned)backlog > sysctl_somaxconn) backlog = sysctl_somaxconn; err = security_socket_listen(sock, backlog); @@ -1378,7 +1358,6 @@ asmlinkage long sys_listen(int fd, int backlog) return err; } - /* * For accept, we attempt to create a new socket, set up the link * with the client, wake up the client, then return the new @@ -1391,7 +1370,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) +asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen) { struct socket *sock, *newsock; struct file *newfile; @@ -1403,7 +1383,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + if (!(newsock = sock_alloc())) goto out_put; newsock->type = sock->type; @@ -1435,11 +1415,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ goto out_fd; if (upeer_sockaddr) { - if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { + if (newsock->ops->getname(newsock, (struct sockaddr *)address, + &len, 2) < 0) { err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); + err = move_addr_to_user(address, len, upeer_sockaddr, + upeer_addrlen); if (err < 0) goto out_fd; } @@ -1461,7 +1443,6 @@ out_fd: goto out_put; } - /* * Attempt to connect to a socket with the server address. The address * is in user space so we verify it is OK and move it to kernel space. @@ -1474,7 +1455,8 @@ out_fd: * include the -EINPROGRESS status for such sockets. */ -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) +asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, + int addrlen) { struct socket *sock; char address[MAX_SOCK_ADDR]; @@ -1487,11 +1469,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl if (err < 0) goto out_put; - err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); + err = + security_socket_connect(sock, (struct sockaddr *)address, addrlen); if (err) goto out_put; - err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, + err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, sock->file->f_flags); out_put: fput_light(sock->file, fput_needed); @@ -1504,12 +1487,13 @@ out: * name to user space. */ -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) +asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; int len, err, fput_needed; - + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1534,22 +1518,27 @@ out: * name to user space. */ -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) +asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; int len, err, fput_needed; - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { err = security_socket_getpeername(sock); if (err) { fput_light(sock->file, fput_needed); return err; } - err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); + err = + sock->ops->getname(sock, (struct sockaddr *)address, &len, + 1); if (!err) - err=move_addr_to_user(address,len, usockaddr, usockaddr_len); + err = move_addr_to_user(address, len, usockaddr, + usockaddr_len); fput_light(sock->file, fput_needed); } return err; @@ -1561,8 +1550,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int _ * the protocol. */ -asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags, - struct sockaddr __user *addr, int addr_len) +asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, + unsigned flags, struct sockaddr __user *addr, + int addr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; @@ -1579,54 +1569,55 @@ asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flag sock = sock_from_file(sock_file, &err); if (!sock) goto out_put; - iov.iov_base=buff; - iov.iov_len=len; - msg.msg_name=NULL; - msg.msg_iov=&iov; - msg.msg_iovlen=1; - msg.msg_control=NULL; - msg.msg_controllen=0; - msg.msg_namelen=0; + iov.iov_base = buff; + iov.iov_len = len; + msg.msg_name = NULL; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; if (addr) { err = move_addr_to_kernel(addr, addr_len, address); if (err < 0) goto out_put; - msg.msg_name=address; - msg.msg_namelen=addr_len; + msg.msg_name = address; + msg.msg_namelen = addr_len; } if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; err = sock_sendmsg(sock, &msg, len); -out_put: +out_put: fput_light(sock_file, fput_needed); return err; } /* - * Send a datagram down a socket. + * Send a datagram down a socket. */ -asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags) +asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } /* - * Receive a frame from the socket and optionally record the address of the + * Receive a frame from the socket and optionally record the address of the * sender. We verify the buffers are writable and if needed move the * sender address from kernel to user space. */ -asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags, - struct sockaddr __user *addr, int __user *addr_len) +asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, + unsigned flags, struct sockaddr __user *addr, + int __user *addr_len) { struct socket *sock; struct iovec iov; struct msghdr msg; char address[MAX_SOCK_ADDR]; - int err,err2; + int err, err2; struct file *sock_file; int fput_needed; @@ -1638,23 +1629,22 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f if (!sock) goto out; - msg.msg_control=NULL; - msg.msg_controllen=0; - msg.msg_iovlen=1; - msg.msg_iov=&iov; - iov.iov_len=size; - iov.iov_base=ubuf; - msg.msg_name=address; - msg.msg_namelen=MAX_SOCK_ADDR; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iovlen = 1; + msg.msg_iov = &iov; + iov.iov_len = size; + iov.iov_base = ubuf; + msg.msg_name = address; + msg.msg_namelen = MAX_SOCK_ADDR; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err=sock_recvmsg(sock, &msg, size, flags); + err = sock_recvmsg(sock, &msg, size, flags); - if(err >= 0 && addr != NULL) - { - err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len); - if(err2<0) - err=err2; + if (err >= 0 && addr != NULL) { + err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); + if (err2 < 0) + err = err2; } out: fput_light(sock_file, fput_needed); @@ -1662,10 +1652,11 @@ out: } /* - * Receive a datagram from a socket. + * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags) +asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, + unsigned flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } @@ -1675,24 +1666,29 @@ asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags * to pass the user mode parameter for the protocols to sort out. */ -asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) +asmlinkage long sys_setsockopt(int fd, int level, int optname, + char __user *optval, int optlen) { int err, fput_needed; struct socket *sock; if (optlen < 0) return -EINVAL; - - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) - { - err = security_socket_setsockopt(sock,level,optname); + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { + err = security_socket_setsockopt(sock, level, optname); if (err) goto out_put; if (level == SOL_SOCKET) - err=sock_setsockopt(sock,level,optname,optval,optlen); + err = + sock_setsockopt(sock, level, optname, optval, + optlen); else - err=sock->ops->setsockopt(sock, level, optname, optval, optlen); + err = + sock->ops->setsockopt(sock, level, optname, optval, + optlen); out_put: fput_light(sock->file, fput_needed); } @@ -1704,27 +1700,32 @@ out_put: * to pass a user mode parameter for the protocols to sort out. */ -asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) +asmlinkage long sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen) { int err, fput_needed; struct socket *sock; - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { err = security_socket_getsockopt(sock, level, optname); if (err) goto out_put; if (level == SOL_SOCKET) - err=sock_getsockopt(sock,level,optname,optval,optlen); + err = + sock_getsockopt(sock, level, optname, optval, + optlen); else - err=sock->ops->getsockopt(sock, level, optname, optval, optlen); + err = + sock->ops->getsockopt(sock, level, optname, optval, + optlen); out_put: fput_light(sock->file, fput_needed); } return err; } - /* * Shutdown a socket. */ @@ -1734,8 +1735,8 @@ asmlinkage long sys_shutdown(int fd, int how) int err, fput_needed; struct socket *sock; - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) - { + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { err = security_socket_shutdown(sock, how); if (!err) err = sock->ops->shutdown(sock, how); @@ -1744,41 +1745,42 @@ asmlinkage long sys_shutdown(int fd, int how) return err; } -/* A couple of helpful macros for getting the address of the 32/64 bit +/* A couple of helpful macros for getting the address of the 32/64 bit * fields which are the same type (int / unsigned) on our platforms. */ #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) - /* * BSD sendmsg interface */ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) { - struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; + struct compat_msghdr __user *msg_compat = + (struct compat_msghdr __user *)msg; struct socket *sock; char address[MAX_SOCK_ADDR]; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] - __attribute__ ((aligned (sizeof(__kernel_size_t)))); - /* 20 is size of ipv6_pktinfo */ + __attribute__ ((aligned(sizeof(__kernel_size_t)))); + /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; struct msghdr msg_sys; int err, ctl_len, iov_size, total_len; int fput_needed; - + err = -EFAULT; if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } + else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) + if (!sock) goto out; /* do not move before msg_sys is valid */ @@ -1786,7 +1788,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) if (msg_sys.msg_iovlen > UIO_MAXIOV) goto out_put; - /* Check whether to allocate the iovec area*/ + /* Check whether to allocate the iovec area */ err = -ENOMEM; iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); if (msg_sys.msg_iovlen > UIO_FASTIOV) { @@ -1800,7 +1802,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); } else err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); - if (err < 0) + if (err < 0) goto out_freeiov; total_len = err; @@ -1808,18 +1810,19 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) if (msg_sys.msg_controllen > INT_MAX) goto out_freeiov; - ctl_len = msg_sys.msg_controllen; + ctl_len = msg_sys.msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { - err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); + err = + cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, + sizeof(ctl)); if (err) goto out_freeiov; ctl_buf = msg_sys.msg_control; ctl_len = msg_sys.msg_controllen; } else if (ctl_len) { - if (ctl_len > sizeof(ctl)) - { + if (ctl_len > sizeof(ctl)) { ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); - if (ctl_buf == NULL) + if (ctl_buf == NULL) goto out_freeiov; } err = -EFAULT; @@ -1828,7 +1831,8 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) * Afterwards, it will be a kernel pointer. Thus the compiler-assisted * checking falls down on this. */ - if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len)) + if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, + ctl_len)) goto out_freectl; msg_sys.msg_control = ctl_buf; } @@ -1839,14 +1843,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) err = sock_sendmsg(sock, &msg_sys, total_len); out_freectl: - if (ctl_buf != ctl) + if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); out_put: fput_light(sock->file, fput_needed); -out: +out: return err; } @@ -1854,12 +1858,14 @@ out: * BSD recvmsg interface */ -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags) +asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, + unsigned int flags) { - struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; + struct compat_msghdr __user *msg_compat = + (struct compat_msghdr __user *)msg; struct socket *sock; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack; + struct iovec *iov = iovstack; struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; @@ -1871,13 +1877,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag /* user mode address pointers */ struct sockaddr __user *uaddr; int __user *uaddr_len; - + if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } else - if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr))) - return -EFAULT; + } + else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1886,8 +1892,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag err = -EMSGSIZE; if (msg_sys.msg_iovlen > UIO_MAXIOV) goto out_put; - - /* Check whether to allocate the iovec area*/ + + /* Check whether to allocate the iovec area */ err = -ENOMEM; iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); if (msg_sys.msg_iovlen > UIO_FASTIOV) { @@ -1897,11 +1903,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag } /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + * Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - - uaddr = (void __user *) msg_sys.msg_name; + + uaddr = (void __user *)msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); @@ -1909,13 +1915,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; - total_len=err; + total_len = err; cmsg_ptr = (unsigned long)msg_sys.msg_control; msg_sys.msg_flags = 0; if (MSG_CMSG_COMPAT & flags) msg_sys.msg_flags = MSG_CMSG_COMPAT; - + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg_sys, total_len, flags); @@ -1924,7 +1930,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag len = err; if (uaddr != NULL) { - err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); + err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, + uaddr_len); if (err < 0) goto out_freeiov; } @@ -1933,10 +1940,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, &msg_compat->msg_controllen); else - err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, &msg->msg_controllen); if (err) goto out_freeiov; @@ -1955,163 +1962,187 @@ out: /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; +static const unsigned char nargs[18]={ + AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), + AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) +}; + #undef AL /* - * System call vectors. + * System call vectors. * * Argument checking cleaned up. Saved 20% in size. * This function doesn't need to set the kernel lock because - * it is set by the callees. + * it is set by the callees. */ asmlinkage long sys_socketcall(int call, unsigned long __user *args) { unsigned long a[6]; - unsigned long a0,a1; + unsigned long a0, a1; int err; - if(call<1||call>SYS_RECVMSG) + if (call < 1 || call > SYS_RECVMSG) return -EINVAL; /* copy_from_user should be SMP safe. */ if (copy_from_user(a, args, nargs[call])) return -EFAULT; - err = audit_socketcall(nargs[call]/sizeof(unsigned long), a); + err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); if (err) return err; - a0=a[0]; - a1=a[1]; - - switch(call) - { - case SYS_SOCKET: - err = sys_socket(a0,a1,a[2]); - break; - case SYS_BIND: - err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]); - break; - case SYS_CONNECT: - err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); - break; - case SYS_LISTEN: - err = sys_listen(a0,a1); - break; - case SYS_ACCEPT: - err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); - break; - case SYS_GETSOCKNAME: - err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); - break; - case SYS_GETPEERNAME: - err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]); - break; - case SYS_SOCKETPAIR: - err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]); - break; - case SYS_SEND: - err = sys_send(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_SENDTO: - err = sys_sendto(a0,(void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], a[5]); - break; - case SYS_RECV: - err = sys_recv(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_RECVFROM: - err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], (int __user *)a[5]); - break; - case SYS_SHUTDOWN: - err = sys_shutdown(a0,a1); - break; - case SYS_SETSOCKOPT: - err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); - break; - case SYS_GETSOCKOPT: - err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]); - break; - case SYS_SENDMSG: - err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]); - break; - case SYS_RECVMSG: - err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]); - break; - default: - err = -EINVAL; - break; + a0 = a[0]; + a1 = a[1]; + + switch (call) { + case SYS_SOCKET: + err = sys_socket(a0, a1, a[2]); + break; + case SYS_BIND: + err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); + break; + case SYS_CONNECT: + err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); + break; + case SYS_LISTEN: + err = sys_listen(a0, a1); + break; + case SYS_ACCEPT: + err = + sys_accept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); + break; + case SYS_GETSOCKNAME: + err = + sys_getsockname(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); + break; + case SYS_GETPEERNAME: + err = + sys_getpeername(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); + break; + case SYS_SOCKETPAIR: + err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); + break; + case SYS_SEND: + err = sys_send(a0, (void __user *)a1, a[2], a[3]); + break; + case SYS_SENDTO: + err = sys_sendto(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], a[5]); + break; + case SYS_RECV: + err = sys_recv(a0, (void __user *)a1, a[2], a[3]); + break; + case SYS_RECVFROM: + err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], + (int __user *)a[5]); + break; + case SYS_SHUTDOWN: + err = sys_shutdown(a0, a1); + break; + case SYS_SETSOCKOPT: + err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); + break; + case SYS_GETSOCKOPT: + err = + sys_getsockopt(a0, a1, a[2], (char __user *)a[3], + (int __user *)a[4]); + break; + case SYS_SENDMSG: + err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); + break; + case SYS_RECVMSG: + err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); + break; + default: + err = -EINVAL; + break; } return err; } -#endif /* __ARCH_WANT_SYS_SOCKETCALL */ +#endif /* __ARCH_WANT_SYS_SOCKETCALL */ -/* +/** + * sock_register - add a socket protocol handler + * @ops: description of protocol + * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * SOCKET module. + * socket interface. The value ops->family coresponds to the + * socket system call protocol family. */ - -int sock_register(struct net_proto_family *ops) +int sock_register(const struct net_proto_family *ops) { int err; if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); + printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, + NPROTO); return -ENOBUFS; } - net_family_write_lock(); - err = -EEXIST; - if (net_families[ops->family] == NULL) { - net_families[ops->family]=ops; + + spin_lock(&net_family_lock); + if (net_families[ops->family]) + err = -EEXIST; + else { + net_families[ops->family] = ops; err = 0; } - net_family_write_unlock(); - printk(KERN_INFO "NET: Registered protocol family %d\n", - ops->family); + spin_unlock(&net_family_lock); + + printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } -/* +/** + * sock_unregister - remove a protocol handler + * @family: protocol family to remove + * * This function is called by a protocol handler that wants to * remove its address family, and have it unlinked from the - * SOCKET module. + * new socket creation. + * + * If protocol handler is a module, then it can use module reference + * counts to protect against new references. If protocol handler is not + * a module then it needs to provide its own protection in + * the ops->create routine. */ - -int sock_unregister(int family) +void sock_unregister(int family) { - if (family < 0 || family >= NPROTO) - return -1; + BUG_ON(family < 0 || family >= NPROTO); - net_family_write_lock(); - net_families[family]=NULL; - net_family_write_unlock(); - printk(KERN_INFO "NET: Unregistered protocol family %d\n", - family); - return 0; + spin_lock(&net_family_lock); + net_families[family] = NULL; + spin_unlock(&net_family_lock); + + synchronize_rcu(); + + printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } static int __init sock_init(void) { /* - * Initialize sock SLAB cache. + * Initialize sock SLAB cache. */ - + sk_init(); /* - * Initialize skbuff SLAB cache + * Initialize skbuff SLAB cache */ skb_init(); /* - * Initialize the protocols module. + * Initialize the protocols module. */ init_inodecache(); @@ -2137,7 +2168,7 @@ void socket_seq_show(struct seq_file *seq) int counter = 0; for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); + counter += per_cpu(sockets_in_use, cpu); /* It can be negative, by the way. 8) */ if (counter < 0) @@ -2145,11 +2176,11 @@ void socket_seq_show(struct seq_file *seq) seq_printf(seq, "sockets: used %d\n", counter); } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, unsigned cmd, - unsigned long arg) + unsigned long arg) { struct socket *sock = file->private_data; int ret = -ENOIOCTLCMD; @@ -2161,6 +2192,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, } #endif +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +{ + return sock->ops->bind(sock, addr, addrlen); +} + +int kernel_listen(struct socket *sock, int backlog) +{ + return sock->ops->listen(sock, backlog); +} + +int kernel_accept(struct socket *sock, struct socket **newsock, int flags) +{ + struct sock *sk = sock->sk; + int err; + + err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, + newsock); + if (err < 0) + goto done; + + err = sock->ops->accept(sock, *newsock, flags); + if (err < 0) { + sock_release(*newsock); + goto done; + } + + (*newsock)->ops = sock->ops; + +done: + return err; +} + +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, + int flags) +{ + return sock->ops->connect(sock, addr, addrlen, flags); +} + +int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock->ops->getname(sock, addr, addrlen, 0); +} + +int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock->ops->getname(sock, addr, addrlen, 1); +} + +int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_getsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->getsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) +{ + if (sock->ops->sendpage) + return sock->ops->sendpage(sock, page, offset, size, flags); + + return sock_no_sendpage(sock, page, offset, size, flags); +} + +int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + err = sock->ops->ioctl(sock, cmd, arg); + set_fs(oldfs); + + return err; +} + /* ABI emulation layers need these two */ EXPORT_SYMBOL(move_addr_to_kernel); EXPORT_SYMBOL(move_addr_to_user); @@ -2177,3 +2311,13 @@ EXPORT_SYMBOL(sock_wake_async); EXPORT_SYMBOL(sockfd_lookup); EXPORT_SYMBOL(kernel_sendmsg); EXPORT_SYMBOL(kernel_recvmsg); +EXPORT_SYMBOL(kernel_bind); +EXPORT_SYMBOL(kernel_listen); +EXPORT_SYMBOL(kernel_accept); +EXPORT_SYMBOL(kernel_connect); +EXPORT_SYMBOL(kernel_getsockname); +EXPORT_SYMBOL(kernel_getpeername); +EXPORT_SYMBOL(kernel_getsockopt); +EXPORT_SYMBOL(kernel_setsockopt); +EXPORT_SYMBOL(kernel_sendpage); +EXPORT_SYMBOL(kernel_sock_ioctl); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 55163af3dcaf..993ff1a5d945 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -331,8 +331,8 @@ rpcauth_unbindcred(struct rpc_task *task) task->tk_msg.rpc_cred = NULL; } -u32 * -rpcauth_marshcred(struct rpc_task *task, u32 *p) +__be32 * +rpcauth_marshcred(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -342,8 +342,8 @@ rpcauth_marshcred(struct rpc_task *task, u32 *p) return cred->cr_ops->crmarshal(task, p); } -u32 * -rpcauth_checkverf(struct rpc_task *task, u32 *p) +__be32 * +rpcauth_checkverf(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -355,7 +355,7 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p) int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, - u32 *data, void *obj) + __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; @@ -369,7 +369,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, - u32 *data, void *obj) + __be32 *data, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 519ebc17c028..a6ed2d22a6e6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -88,7 +88,6 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - char path[48]; spinlock_t lock; }; @@ -225,9 +224,8 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - memset(ctx, 0, sizeof(*ctx)); ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ spin_lock_init(&ctx->gc_seq_lock); @@ -391,9 +389,8 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) { struct gss_upcall_msg *gss_msg; - gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); if (gss_msg != NULL) { - memset(gss_msg, 0, sizeof(*gss_msg)); INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); @@ -692,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (err) goto err_put_mech; - snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", - clnt->cl_pathname, - gss_auth->mech->gm_name); - gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { err = PTR_ERR(gss_auth->dentry); goto err_put_mech; @@ -720,8 +715,7 @@ gss_destroy(struct rpc_auth *auth) auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->path); - dput(gss_auth->dentry); + rpc_unlink(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); @@ -776,10 +770,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) goto out_err; - memset(cred, 0, sizeof(*cred)); atomic_set(&cred->gc_count, 1); cred->gc_uid = acred->uid; /* @@ -833,14 +826,14 @@ out: * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static u32 * -gss_marshal(struct rpc_task *task, u32 *p) +static __be32 * +gss_marshal(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 *cred_len; + __be32 *cred_len; struct rpc_rqst *req = task->tk_rqstp; u32 maj_stat = 0; struct xdr_netobj mic; @@ -901,12 +894,12 @@ gss_refresh(struct rpc_task *task) return 0; } -static u32 * -gss_validate(struct rpc_task *task, u32 *p) +static __be32 * +gss_validate(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 seq; + __be32 seq; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -947,13 +940,14 @@ out_bad: static inline int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) + kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf integ_buf; - u32 *integ_len = NULL; + __be32 *integ_len = NULL; struct xdr_netobj mic; - u32 offset, *q; + u32 offset; + __be32 *q; struct kvec *iov; u32 maj_stat = 0; int status = -EIO; @@ -1039,13 +1033,13 @@ out: static inline int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) + kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; u32 offset; u32 maj_stat; int status; - u32 *opaque_len; + __be32 *opaque_len; struct page **inpages; int first; int pad; @@ -1102,7 +1096,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_wrap_req(struct rpc_task *task, - kxdrproc_t encode, void *rqstp, u32 *p, void *obj) + kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, @@ -1139,7 +1133,7 @@ out: static inline int gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, u32 **p) + struct rpc_rqst *rqstp, __be32 **p) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct xdr_buf integ_buf; @@ -1176,7 +1170,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static inline int gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, - struct rpc_rqst *rqstp, u32 **p) + struct rpc_rqst *rqstp, __be32 **p) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; u32 offset; @@ -1205,13 +1199,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, static int gss_unwrap_resp(struct rpc_task *task, - kxdrproc_t decode, void *rqstp, u32 *p, void *obj) + kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 *savedp = p; + __be32 *savedp = p; struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; int savedlen = head->iov_len; int status = -EIO; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 76b969e6904f..e11a40b25cce 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -34,6 +34,7 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +#include <linux/err.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/slab.h> @@ -49,7 +50,7 @@ u32 krb5_encrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -58,26 +59,27 @@ krb5_encrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_encrypt: output data:\n"); print_hexl((u32 *)out, length, 0); @@ -90,7 +92,7 @@ EXPORT_SYMBOL(krb5_encrypt); u32 krb5_decrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -99,25 +101,26 @@ krb5_decrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_decrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_decrypt: output_data:\n"); print_hexl((u32 *)out, length, 0); @@ -197,11 +200,9 @@ out: static int checksummer(struct scatterlist *sg, void *data) { - struct crypto_tfm *tfm = (struct crypto_tfm *)data; + struct hash_desc *desc = data; - crypto_digest_update(tfm, sg, 1); - - return 0; + return crypto_hash_update(desc, sg, sg->length); } /* checksum the plaintext data and hdrlen bytes of the token header */ @@ -210,8 +211,9 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum) { char *cksumname; - struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ + struct hash_desc desc; /* XXX add to ctx? */ struct scatterlist sg[1]; + int err; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -222,25 +224,35 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, " unsupported checksum %d", cksumtype); return GSS_S_FAILURE; } - if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP))) + desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) return GSS_S_FAILURE; - cksum->len = crypto_tfm_alg_digestsize(tfm); + cksum->len = crypto_hash_digestsize(desc.tfm); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - crypto_digest_init(tfm); + err = crypto_hash_init(&desc); + if (err) + goto out; sg_set_buf(sg, header, hdrlen); - crypto_digest_update(tfm, sg, 1); - process_xdr_buf(body, body_offset, body->len - body_offset, - checksummer, tfm); - crypto_digest_final(tfm, cksum->data); - crypto_free_tfm(tfm); - return 0; + err = crypto_hash_update(&desc, sg, hdrlen); + if (err) + goto out; + err = process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, &desc); + if (err) + goto out; + err = crypto_hash_final(&desc, cksum->data); + +out: + crypto_free_hash(desc.tfm); + return err ? GSS_S_FAILURE : 0; } EXPORT_SYMBOL(make_checksum); struct encryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; int pos; struct xdr_buf *outbuf; struct page **pages; @@ -285,8 +297,8 @@ encryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, - thislen, desc->iv); + ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, + desc->infrags, thislen); if (ret) return ret; if (fraglen) { @@ -305,16 +317,18 @@ encryptor(struct scatterlist *sg, void *data) } int -gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, - struct page **pages) +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset, struct page **pages) { int ret; struct encryptor_desc desc; - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.pos = offset; desc.outbuf = buf; desc.pages = pages; @@ -329,7 +343,7 @@ EXPORT_SYMBOL(gss_encrypt_xdr_buf); struct decryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; struct scatterlist frags[4]; int fragno; int fraglen; @@ -355,8 +369,8 @@ decryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, - thislen, desc->iv); + ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, + desc->frags, thislen); if (ret) return ret; if (fraglen) { @@ -373,15 +387,18 @@ decryptor(struct scatterlist *sg, void *data) } int -gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset) { struct decryptor_desc desc; /* XXXJBF: */ - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 129e2bd36aff..325e72e4fd31 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -34,6 +34,7 @@ * */ +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> @@ -78,10 +79,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res) +get_key(const void *p, const void *end, struct crypto_blkcipher **res) { struct xdr_netobj key; - int alg, alg_mode; + int alg; char *alg_name; p = simple_get_bytes(p, end, &alg, sizeof(alg)); @@ -93,18 +94,19 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) switch (alg) { case ENCTYPE_DES_CBC_RAW: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; break; default: printk("gss_kerberos_mech: unsupported algorithm %d\n", alg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name); goto out_err_free_tfm; } @@ -113,7 +115,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: kfree(key.data); p = ERR_PTR(-EINVAL); @@ -129,9 +131,8 @@ gss_import_sec_context_kerberos(const void *p, const void *end = (const void *)((const char *)p + len); struct krb5_ctx *ctx; - if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; - memset(ctx, 0, sizeof(*ctx)); p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); if (IS_ERR(p)) @@ -169,13 +170,13 @@ gss_import_sec_context_kerberos(const void *p, } ctx_id->internal_ctx_id = ctx; - dprintk("RPC: Succesfully imported new context.\n"); + dprintk("RPC: Successfully imported new context.\n"); return 0; out_err_free_key2: - crypto_free_tfm(ctx->seq); + crypto_free_blkcipher(ctx->seq); out_err_free_key1: - crypto_free_tfm(ctx->enc); + crypto_free_blkcipher(ctx->enc); out_err_free_mech: kfree(ctx->mech_used.data); out_err_free_ctx: @@ -188,8 +189,8 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - crypto_free_tfm(kctx->seq); - crypto_free_tfm(kctx->enc); + crypto_free_blkcipher(kctx->seq); + crypto_free_blkcipher(kctx->enc); kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index f43311221a72..08601ee4cd73 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -70,7 +70,7 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(krb5_seq_lock); u32 gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, @@ -115,7 +115,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; - *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c53ead39118d..c604baf3a5f6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -41,7 +41,7 @@ #endif s32 -krb5_make_seq_num(struct crypto_tfm *key, +krb5_make_seq_num(struct crypto_blkcipher *key, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) @@ -62,7 +62,7 @@ krb5_make_seq_num(struct crypto_tfm *key, } s32 -krb5_get_seq_num(struct crypto_tfm *key, +krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, int *direction, s32 * seqnum) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 89d1f3e14128..cc45c1605f80 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, goto out_err; } - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; @@ -177,9 +177,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, msg_start = krb5_hdr + 24; /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); - *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg); make_confounder(msg_start, blocksize); @@ -346,7 +346,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); data_start = ptr + 22 + blocksize; orig_start = buf->head[0].iov_base + offset; data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index f8bac6ccd524..3db745379d06 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -224,7 +224,8 @@ EXPORT_SYMBOL(gss_service_to_auth_domain_name); void gss_mech_put(struct gss_api_mech * gm) { - module_put(gm->gm_owner); + if (gm) + module_put(gm->gm_owner); } EXPORT_SYMBOL(gss_mech_put); @@ -236,9 +237,8 @@ gss_import_sec_context(const void *input_token, size_t bufsize, struct gss_api_mech *mech, struct gss_ctx **ctx_id) { - if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL))) + if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL))) return GSS_S_FAILURE; - memset(*ctx_id, 0, sizeof(**ctx_id)); (*ctx_id)->mech_type = gss_mech_get(mech); return mech->gm_ops @@ -307,8 +307,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle) (*context_handle)->mech_type->gm_ops ->gss_delete_sec_context((*context_handle) ->internal_ctx_id); - if ((*context_handle)->mech_type) - gss_mech_put((*context_handle)->mech_type); + gss_mech_put((*context_handle)->mech_type); kfree(*context_handle); *context_handle=NULL; return GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5bf11ccba7cd..bdedf456bc17 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -34,6 +34,7 @@ * */ +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> @@ -83,10 +84,11 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) +get_key(const void *p, const void *end, struct crypto_blkcipher **res, + int *resalg) { struct xdr_netobj key = { 0 }; - int alg_mode,setkey = 0; + int setkey = 0; char *alg_name; p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); @@ -98,14 +100,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) switch (*resalg) { case NID_des_cbc: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; setkey = 1; break; case NID_cast5_cbc: /* XXXX here in name only, not used */ - alg_name = "cast5"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(cast5)"; setkey = 0; /* XXX will need to set to 1 */ break; case NID_md5: @@ -113,19 +113,20 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); } alg_name = "md5"; - alg_mode = 0; setkey = 0; break; default: dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } if (setkey) { - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); goto out_err_free_tfm; } @@ -136,7 +137,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: if(key.len > 0) kfree(key.data); @@ -152,9 +153,8 @@ gss_import_sec_context_spkm3(const void *p, size_t len, const void *end = (const void *)((const char *)p + len); struct spkm3_ctx *ctx; - if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) + if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; - memset(ctx, 0, sizeof(*ctx)); p = simple_get_netobj(p, end, &ctx->ctx_id); if (IS_ERR(p)) @@ -201,13 +201,13 @@ gss_import_sec_context_spkm3(const void *p, size_t len, ctx_id->internal_ctx_id = ctx; - dprintk("Succesfully imported new spkm context.\n"); + dprintk("Successfully imported new spkm context.\n"); return 0; out_err_free_key2: - crypto_free_tfm(ctx->derived_integ_key); + crypto_free_blkcipher(ctx->derived_integ_key); out_err_free_key1: - crypto_free_tfm(ctx->derived_conf_key); + crypto_free_blkcipher(ctx->derived_conf_key); out_err_free_s_key: kfree(ctx->share_key.data); out_err_free_mech: @@ -224,8 +224,8 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - crypto_free_tfm(sctx->derived_integ_key); - crypto_free_tfm(sctx->derived_conf_key); + crypto_free_blkcipher(sctx->derived_integ_key); + crypto_free_blkcipher(sctx->derived_conf_key); kfree(sctx->share_key.data); kfree(sctx->mech_used.data); kfree(sctx); diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index af0d7ce74686..854a983ccf26 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -90,10 +90,9 @@ asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) { - if (!(out->data = kmalloc(explen,GFP_KERNEL))) + if (!(out->data = kzalloc(explen,GFP_KERNEL))) return 0; out->len = explen; - memset(out->data, 0, explen); memcpy(out->data, in, enclen); return 1; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d51e316c5821..638c0b576203 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -425,6 +425,7 @@ static int rsc_parse(struct cache_detail *cd, struct rsc rsci, *rscp = NULL; time_t expiry; int status = -EINVAL; + struct gss_api_mech *gm = NULL; memset(&rsci, 0, sizeof(rsci)); /* context handle */ @@ -453,7 +454,6 @@ static int rsc_parse(struct cache_detail *cd, set_bit(CACHE_NEGATIVE, &rsci.h.flags); else { int N, i; - struct gss_api_mech *gm; /* gid */ if (get_int(&mesg, &rsci.cred.cr_gid)) @@ -488,21 +488,17 @@ static int rsc_parse(struct cache_detail *cd, status = -EINVAL; /* mech-specific data: */ len = qword_get(&mesg, buf, mlen); - if (len < 0) { - gss_mech_put(gm); + if (len < 0) goto out; - } status = gss_import_sec_context(buf, len, gm, &rsci.mechctx); - if (status) { - gss_mech_put(gm); + if (status) goto out; - } - gss_mech_put(gm); } rsci.h.expiry_time = expiry; rscp = rsc_update(&rsci, rscp); status = 0; out: + gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, &rsc_cache); @@ -611,7 +607,7 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o) if (argv->iov_len < 4) return -1; - o->len = ntohl(svc_getu32(argv)); + o->len = svc_getnl(argv); l = round_up_to_quad(o->len); if (argv->iov_len < l) return -1; @@ -624,17 +620,17 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o) static inline int svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) { - u32 *p; + u8 *p; if (resv->iov_len + 4 > PAGE_SIZE) return -1; - svc_putu32(resv, htonl(o->len)); + svc_putnl(resv, o->len); p = resv->iov_base + resv->iov_len; resv->iov_len += round_up_to_quad(o->len); if (resv->iov_len > PAGE_SIZE) return -1; memcpy(p, o->data, o->len); - memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len); + memset(p + o->len, 0, round_up_to_quad(o->len) - o->len); return 0; } @@ -644,7 +640,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) */ static int gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, - u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp) + __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp) { struct gss_ctx *ctx_id = rsci->mechctx; struct xdr_buf rpchdr; @@ -661,7 +657,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, *authp = rpc_autherr_badverf; if (argv->iov_len < 4) return SVC_DENIED; - flavor = ntohl(svc_getu32(argv)); + flavor = svc_getnl(argv); if (flavor != RPC_AUTH_GSS) return SVC_DENIED; if (svc_safe_getnetobj(argv, &checksum)) @@ -691,9 +687,9 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, static int gss_write_null_verf(struct svc_rqst *rqstp) { - u32 *p; + __be32 *p; - svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_NULL)); + svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; /* don't really need to check if head->iov_len > PAGE_SIZE ... */ *p++ = 0; @@ -705,14 +701,14 @@ gss_write_null_verf(struct svc_rqst *rqstp) static int gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) { - u32 xdr_seq; + __be32 xdr_seq; u32 maj_stat; struct xdr_buf verf_data; struct xdr_netobj mic; - u32 *p; + __be32 *p; struct kvec iov; - svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS)); + svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); xdr_seq = htonl(seq); iov.iov_base = &xdr_seq; @@ -786,7 +782,7 @@ EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); static inline int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) { - u32 raw; + __be32 raw; int status; status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); @@ -809,7 +805,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) struct xdr_netobj mic; struct xdr_buf integ_buf; - integ_len = ntohl(svc_getu32(&buf->head[0])); + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) goto out; if (integ_len > buf->len) @@ -829,19 +825,87 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); if (maj_stat != GSS_S_COMPLETE) goto out; - if (ntohl(svc_getu32(&buf->head[0])) != seq) + if (svc_getnl(&buf->head[0]) != seq) goto out; stat = 0; out: return stat; } +static inline int +total_buf_len(struct xdr_buf *buf) +{ + return buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len; +} + +static void +fix_priv_head(struct xdr_buf *buf, int pad) +{ + if (buf->page_len == 0) { + /* We need to adjust head and buf->len in tandem in this + * case to make svc_defer() work--it finds the original + * buffer start using buf->len - buf->head[0].iov_len. */ + buf->head[0].iov_len -= pad; + } +} + +static int +unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +{ + u32 priv_len, maj_stat; + int pad, saved_len, remaining_len, offset; + + rqstp->rq_sendfile_ok = 0; + + priv_len = svc_getnl(&buf->head[0]); + if (rqstp->rq_deferred) { + /* Already decrypted last time through! The sequence number + * check at out_seq is unnecessary but harmless: */ + goto out_seq; + } + /* buf->len is the number of bytes from the original start of the + * request to the end, where head[0].iov_len is just the bytes + * not yet read from the head, so these two values are different: */ + remaining_len = total_buf_len(buf); + if (priv_len > remaining_len) + return -EINVAL; + pad = remaining_len - priv_len; + buf->len -= pad; + fix_priv_head(buf, pad); + + /* Maybe it would be better to give gss_unwrap a length parameter: */ + saved_len = buf->len; + buf->len = priv_len; + maj_stat = gss_unwrap(ctx, 0, buf); + pad = priv_len - buf->len; + buf->len = saved_len; + buf->len -= pad; + /* The upper layers assume the buffer is aligned on 4-byte boundaries. + * In the krb5p case, at least, the data ends up offset, so we need to + * move it around. */ + /* XXX: This is very inefficient. It would be better to either do + * this while we encrypt, or maybe in the receive code, if we can peak + * ahead and work out the service and mechanism there. */ + offset = buf->head[0].iov_len % 4; + if (offset) { + buf->buflen = RPCSVC_MAXPAYLOAD; + xdr_shift_buf(buf, offset); + fix_priv_head(buf, pad); + } + if (maj_stat != GSS_S_COMPLETE) + return -EINVAL; +out_seq: + if (svc_getnl(&buf->head[0]) != seq) + return -EINVAL; + return 0; +} + struct gss_svc_data { /* decoded gss client cred: */ struct rpc_gss_wire_cred clcred; /* pointer to the beginning of the procedure-specific results, * which may be encrypted/checksummed in svcauth_gss_release: */ - u32 *body_start; + __be32 *body_start; struct rsc *rsci; }; @@ -882,7 +946,7 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) * response here and return SVC_COMPLETE. */ static int -svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -892,8 +956,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; struct rsi *rsip, rsikey; - u32 *rpcstart; - u32 *reject_stat = resv->iov_base + resv->iov_len; + __be32 *rpcstart; + __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",argv->iov_len); @@ -921,12 +985,12 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) if (argv->iov_len < 5 * 4) goto auth_err; - crlen = ntohl(svc_getu32(argv)); - if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION) + crlen = svc_getnl(argv); + if (svc_getnl(argv) != RPC_GSS_VERSION) goto auth_err; - gc->gc_proc = ntohl(svc_getu32(argv)); - gc->gc_seq = ntohl(svc_getu32(argv)); - gc->gc_svc = ntohl(svc_getu32(argv)); + gc->gc_proc = svc_getnl(argv); + gc->gc_seq = svc_getnl(argv); + gc->gc_svc = svc_getnl(argv); if (svc_safe_getnetobj(argv, &gc->gc_ctx)) goto auth_err; if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4) @@ -952,9 +1016,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) case RPC_GSS_PROC_CONTINUE_INIT: if (argv->iov_len < 2 * 4) goto auth_err; - if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL) + if (svc_getnl(argv) != RPC_AUTH_NULL) goto auth_err; - if (ntohl(svc_getu32(argv)) != 0) + if (svc_getnl(argv) != 0) goto auth_err; break; case RPC_GSS_PROC_DATA: @@ -1012,14 +1076,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto drop; if (resv->iov_len + 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, rpc_success); + svc_putnl(resv, RPC_SUCCESS); if (svc_safe_putnetobj(resv, &rsip->out_handle)) goto drop; if (resv->iov_len + 3 * 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, htonl(rsip->major_status)); - svc_putu32(resv, htonl(rsip->minor_status)); - svc_putu32(resv, htonl(GSS_SEQ_WIN)); + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); if (svc_safe_putnetobj(resv, &rsip->out_token)) goto drop; rqstp->rq_client = NULL; @@ -1029,7 +1093,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; - svc_putu32(resv, rpc_success); + svc_putnl(resv, RPC_SUCCESS); goto complete; case RPC_GSS_PROC_DATA: *authp = rpcsec_gsserr_ctxproblem; @@ -1047,11 +1111,18 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) goto auth_err; /* placeholders for length and seq. number: */ svcdata->body_start = resv->iov_base + resv->iov_len; - svc_putu32(resv, 0); - svc_putu32(resv, 0); + svc_putnl(resv, 0); + svc_putnl(resv, 0); break; case RPC_GSS_SVC_PRIVACY: - /* currently unsupported */ + if (unwrap_priv_data(rqstp, &rqstp->rq_arg, + gc->gc_seq, rsci->mechctx)) + goto auth_err; + /* placeholders for length and seq. number: */ + svcdata->body_start = resv->iov_base + resv->iov_len; + svc_putnl(resv, 0); + svc_putnl(resv, 0); + break; default: goto auth_err; } @@ -1076,8 +1147,8 @@ out: return ret; } -static int -svcauth_gss_release(struct svc_rqst *rqstp) +static inline int +svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc = &gsd->clcred; @@ -1085,73 +1156,151 @@ svcauth_gss_release(struct svc_rqst *rqstp) struct xdr_buf integ_buf; struct xdr_netobj mic; struct kvec *resv; - u32 *p; + __be32 *p; int integ_offset, integ_len; int stat = -EINVAL; + p = gsd->body_start; + gsd->body_start = NULL; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + goto out; + } + p++; + integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; + integ_len = resbuf->len - integ_offset; + BUG_ON(integ_len % 4); + *p++ = htonl(integ_len); + *p++ = htonl(gc->gc_seq); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, + integ_len)) + BUG(); + if (resbuf->page_len == 0 + && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE + < PAGE_SIZE) { + BUG_ON(resbuf->tail[0].iov_len); + /* Use head for everything */ + resv = &resbuf->head[0]; + } else if (resbuf->tail[0].iov_base == NULL) { + if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) + goto out_err; + resbuf->tail[0].iov_base = resbuf->head[0].iov_base + + resbuf->head[0].iov_len; + resbuf->tail[0].iov_len = 0; + rqstp->rq_restailpage = 0; + resv = &resbuf->tail[0]; + } else { + resv = &resbuf->tail[0]; + } + mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; + if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) + goto out_err; + svc_putnl(resv, mic.len); + memset(mic.data + mic.len, 0, + round_up_to_quad(mic.len) - mic.len); + resv->iov_len += XDR_QUADLEN(mic.len) << 2; + /* not strictly required: */ + resbuf->len += XDR_QUADLEN(mic.len) << 2; + BUG_ON(resv->iov_len > PAGE_SIZE); +out: + stat = 0; +out_err: + return stat; +} + +static inline int +svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + struct page **inpages = NULL; + __be32 *p, *len; + int offset; + int pad; + + p = gsd->body_start; + gsd->body_start = NULL; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + return 0; + } + p++; + len = p++; + offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base; + *p++ = htonl(gc->gc_seq); + inpages = resbuf->pages; + /* XXX: Would be better to write some xdr helper functions for + * nfs{2,3,4}xdr.c that place the data right, instead of copying: */ + if (resbuf->tail[0].iov_base && rqstp->rq_restailpage == 0) { + BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base + + PAGE_SIZE); + BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base); + if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len + + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE) + return -ENOMEM; + memmove(resbuf->tail[0].iov_base + RPC_MAX_AUTH_SIZE, + resbuf->tail[0].iov_base, + resbuf->tail[0].iov_len); + resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE; + } + if (resbuf->tail[0].iov_base == NULL) { + if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE) + return -ENOMEM; + resbuf->tail[0].iov_base = resbuf->head[0].iov_base + + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE; + resbuf->tail[0].iov_len = 0; + rqstp->rq_restailpage = 0; + } + if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages)) + return -ENOMEM; + *len = htonl(resbuf->len - offset); + pad = 3 - ((resbuf->len - offset - 1)&3); + p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); + memset(p, 0, pad); + resbuf->tail[0].iov_len += pad; + resbuf->len += pad; + return 0; +} + +static int +svcauth_gss_release(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + int stat = -EINVAL; + if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; /* Release can be called twice, but we only wrap once. */ if (gsd->body_start == NULL) goto out; /* normally not set till svc_send, but we need it here: */ - resbuf->len = resbuf->head[0].iov_len - + resbuf->page_len + resbuf->tail[0].iov_len; + /* XXX: what for? Do we mess it up the moment we call svc_putu32 + * or whatever? */ + resbuf->len = total_buf_len(resbuf); switch (gc->gc_svc) { case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: - p = gsd->body_start; - gsd->body_start = NULL; - /* move accept_stat to right place: */ - memcpy(p, p + 2, 4); - /* don't wrap in failure case: */ - /* Note: counting on not getting here if call was not even - * accepted! */ - if (*p != rpc_success) { - resbuf->head[0].iov_len -= 2 * 4; - goto out; - } - p++; - integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; - integ_len = resbuf->len - integ_offset; - BUG_ON(integ_len % 4); - *p++ = htonl(integ_len); - *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, - integ_len)) - BUG(); - if (resbuf->page_len == 0 - && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE - < PAGE_SIZE) { - BUG_ON(resbuf->tail[0].iov_len); - /* Use head for everything */ - resv = &resbuf->head[0]; - } else if (resbuf->tail[0].iov_base == NULL) { - if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE - > PAGE_SIZE) - goto out_err; - resbuf->tail[0].iov_base = - resbuf->head[0].iov_base - + resbuf->head[0].iov_len; - resbuf->tail[0].iov_len = 0; - rqstp->rq_restailpage = 0; - resv = &resbuf->tail[0]; - } else { - resv = &resbuf->tail[0]; - } - mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; - if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) + stat = svcauth_gss_wrap_resp_integ(rqstp); + if (stat) goto out_err; - svc_putu32(resv, htonl(mic.len)); - memset(mic.data + mic.len, 0, - round_up_to_quad(mic.len) - mic.len); - resv->iov_len += XDR_QUADLEN(mic.len) << 2; - /* not strictly required: */ - resbuf->len += XDR_QUADLEN(mic.len) << 2; - BUG_ON(resv->iov_len > PAGE_SIZE); break; case RPC_GSS_SVC_PRIVACY: + stat = svcauth_gss_wrap_resp_priv(rqstp); + if (stat) + goto out_err; + break; default: goto out_err; } diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index f56767aaa927..3be257dc32b2 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -60,8 +60,8 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) /* * Marshal credential. */ -static u32 * -nul_marshal(struct rpc_task *task, u32 *p) +static __be32 * +nul_marshal(struct rpc_task *task, __be32 *p) { *p++ = htonl(RPC_AUTH_NULL); *p++ = 0; @@ -81,8 +81,8 @@ nul_refresh(struct rpc_task *task) return 0; } -static u32 * -nul_validate(struct rpc_task *task, u32 *p) +static __be32 * +nul_validate(struct rpc_task *task, __be32 *p) { rpc_authflavor_t flavor; u32 size; @@ -118,6 +118,8 @@ struct rpc_auth null_auth = { .au_cslack = 4, .au_rslack = 2, .au_ops = &authnull_ops, + .au_flavor = RPC_AUTH_NULL, + .au_count = ATOMIC_INIT(0), }; static diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index df14b6bfbf10..f7f990c9afe2 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -137,12 +137,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ -static u32 * -unx_marshal(struct rpc_task *task, u32 *p) +static __be32 * +unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; - u32 *base, *hold; + __be32 *base, *hold; int i; *p++ = htonl(RPC_AUTH_UNIX); @@ -178,8 +178,8 @@ unx_refresh(struct rpc_task *task) return 0; } -static u32 * -unx_validate(struct rpc_task *task, u32 *p) +static __be32 * +unx_validate(struct rpc_task *task, __be32 *p) { rpc_authflavor_t flavor; u32 size; @@ -225,6 +225,7 @@ struct rpc_auth unix_auth = { .au_cslack = UNX_WRITESLACK, .au_rslack = 2, /* assume AUTH_NULL verf */ .au_ops = &authunix_ops, + .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), .au_credcache = &unix_cred_cache, }; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7026b0866b7b..00cb388ece03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -71,7 +71,12 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, new = detail->alloc(); if (!new) return NULL; + /* must fully initialise 'new', else + * we might get lose if we need to + * cache_put it soon. + */ cache_init(new); + detail->init(new, key); write_lock(&detail->hash_lock); @@ -85,7 +90,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, return tmp; } } - detail->init(new, key); new->next = *head; *head = new; detail->entries++; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index aa8965e9d307..124ff0ceb55b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -60,8 +60,8 @@ static void call_refreshresult(struct rpc_task *task); static void call_timeout(struct rpc_task *task); static void call_connect(struct rpc_task *task); static void call_connect_status(struct rpc_task *task); -static u32 * call_header(struct rpc_task *task); -static u32 * call_verify(struct rpc_task *task); +static __be32 * call_header(struct rpc_task *task); +static __be32 * call_verify(struct rpc_task *task); static int @@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) } } -/* - * Create an RPC client - * FIXME: This should also take a flags argument (as in task->tk_flags). - * It's called (among others) from pmap_create_client, which may in - * turn be called by an async task. In this case, rpciod should not be - * made to sleep too long. - */ -struct rpc_clnt * -rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *program, u32 vers, - rpc_authflavor_t flavor) +static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) { struct rpc_version *version; struct rpc_clnt *clnt = NULL; @@ -125,10 +115,9 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, goto out_err; err = -ENOMEM; - clnt = kmalloc(sizeof(*clnt), GFP_KERNEL); + clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) goto out_err; - memset(clnt, 0, sizeof(*clnt)); atomic_set(&clnt->cl_users, 0); atomic_set(&clnt->cl_count, 1); clnt->cl_parent = clnt; @@ -148,16 +137,12 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_pmap = &clnt->cl_pmap_default; - clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; - clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); - rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); - if (!clnt->cl_port) + if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; clnt->cl_rtt = &clnt->cl_rtt_default; @@ -184,8 +169,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_pathname); - dput(clnt->cl_dentry); + rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } out_no_path: @@ -193,40 +177,71 @@ out_no_path: kfree(clnt->cl_server); kfree(clnt); out_err: - xprt_destroy(xprt); + xprt_put(xprt); out_no_xprt: return ERR_PTR(err); } -/** - * Create an RPC client - * @xprt - pointer to xprt struct - * @servname - name of server - * @info - rpc_program - * @version - rpc_program version - * @authflavor - rpc_auth flavour to use +/* + * rpc_create - create an RPC client and transport with one call + * @args: rpc_clnt create argument structure * - * Creates an RPC client structure, then pings the server in order to - * determine if it is up, and if it supports this program and version. + * Creates and initializes an RPC transport and an RPC client. * - * This function should never be called by asynchronous tasks such as - * the portmapper. + * It can ping the server in order to determine if it is up, and to see if + * it supports this program and version. RPC_CLNT_CREATE_NOPING disables + * this behavior so asynchronous tasks can also use rpc_create. */ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) +struct rpc_clnt *rpc_create(struct rpc_create_args *args) { + struct rpc_xprt *xprt; struct rpc_clnt *clnt; - int err; - - clnt = rpc_new_client(xprt, servname, info, version, authflavor); + + xprt = xprt_create_transport(args->protocol, args->address, + args->addrsize, args->timeout); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; + + /* + * By default, kernel RPC client connects from a reserved port. + * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, + * but it is always enabled for rpciod, which handles the connect + * operation. + */ + xprt->resvport = 1; + if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) + xprt->resvport = 0; + + dprintk("RPC: creating %s client for %s (xprt %p)\n", + args->program->name, args->servername, xprt); + + clnt = rpc_new_client(xprt, args->servername, args->program, + args->version, args->authflavor); if (IS_ERR(clnt)) return clnt; - err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); - if (err == 0) - return clnt; - rpc_shutdown_client(clnt); - return ERR_PTR(err); + + if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { + int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } + } + + clnt->cl_softrtry = 1; + if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + clnt->cl_softrtry = 0; + + if (args->flags & RPC_CLNT_CREATE_INTR) + clnt->cl_intr = 1; + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; + if (args->flags & RPC_CLNT_CREATE_ONESHOT) + clnt->cl_oneshot = 1; + + return clnt; } +EXPORT_SYMBOL_GPL(rpc_create); /* * This function clones the RPC client structure. It allows us to share the @@ -246,21 +261,17 @@ rpc_clone_client(struct rpc_clnt *clnt) atomic_set(&new->cl_users, 0); new->cl_parent = clnt; atomic_inc(&clnt->cl_count); - /* Duplicate portmapper */ - rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); + new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; - if (!IS_ERR(new->cl_dentry)) { + if (!IS_ERR(new->cl_dentry)) dget(new->cl_dentry); - rpc_get_mount(); - } rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - new->cl_pmap = &new->cl_pmap_default; - new->cl_metrics = rpc_alloc_iostats(clnt); + new->cl_metrics = rpc_alloc_iostats(clnt); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); @@ -318,24 +329,21 @@ rpc_destroy_client(struct rpc_clnt *clnt) clnt->cl_auth = NULL; } if (clnt->cl_parent != clnt) { + if (!IS_ERR(clnt->cl_dentry)) + dput(clnt->cl_dentry); rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (clnt->cl_pathname[0]) - rpc_rmdir(clnt->cl_pathname); - if (clnt->cl_xprt) { - xprt_destroy(clnt->cl_xprt); - clnt->cl_xprt = NULL; + if (!IS_ERR(clnt->cl_dentry)) { + rpc_rmdir(clnt->cl_dentry); + rpc_put_mount(); } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - if (!IS_ERR(clnt->cl_dentry)) { - dput(clnt->cl_dentry); - rpc_put_mount(); - } + xprt_put(clnt->cl_xprt); kfree(clnt); return 0; } @@ -544,6 +552,40 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) task->tk_action = rpc_exit_task; } +/** + * rpc_peeraddr - extract remote peer address from clnt's xprt + * @clnt: RPC client structure + * @buf: target buffer + * @size: length of target buffer + * + * Returns the number of bytes that are actually in the stored address. + */ +size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) +{ + size_t bytes; + struct rpc_xprt *xprt = clnt->cl_xprt; + + bytes = sizeof(xprt->addr); + if (bytes > bufsize) + bytes = bufsize; + memcpy(buf, &clnt->cl_xprt->addr, bytes); + return xprt->addrlen; +} +EXPORT_SYMBOL_GPL(rpc_peeraddr); + +/** + * rpc_peeraddr2str - return remote peer address in printable format + * @clnt: RPC client structure + * @format: address format + * + */ +char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) +{ + struct rpc_xprt *xprt = clnt->cl_xprt; + return xprt->ops->print_addr(xprt, format); +} +EXPORT_SYMBOL_GPL(rpc_peeraddr2str); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { @@ -564,7 +606,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) { return clnt->cl_xprt->max_payload; } -EXPORT_SYMBOL(rpc_max_payload); +EXPORT_SYMBOL_GPL(rpc_max_payload); /** * rpc_force_rebind - force transport to check that remote port is unchanged @@ -574,9 +616,9 @@ EXPORT_SYMBOL(rpc_max_payload); void rpc_force_rebind(struct rpc_clnt *clnt) { if (clnt->cl_autobind) - clnt->cl_port = 0; + xprt_clear_bound(clnt->cl_xprt); } -EXPORT_SYMBOL(rpc_force_rebind); +EXPORT_SYMBOL_GPL(rpc_force_rebind); /* * Restart an (async) RPC call. Usually called from within the @@ -740,7 +782,7 @@ call_encode(struct rpc_task *task) struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; - u32 *p; + __be32 *p; dprintk("RPC: %4d call_encode (status %d)\n", task->tk_pid, task->tk_status); @@ -785,16 +827,16 @@ call_encode(struct rpc_task *task) static void call_bind(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_connect; - if (!clnt->cl_port) { + if (!xprt_bound(xprt)) { task->tk_action = call_bind_status; - task->tk_timeout = task->tk_xprt->bind_timeout; - rpc_getport(task, clnt); + task->tk_timeout = xprt->bind_timeout; + xprt->ops->rpcbind(task); } } @@ -819,15 +861,11 @@ call_bind_status(struct rpc_task *task) dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", task->tk_pid); rpc_delay(task, 3*HZ); - goto retry_bind; + goto retry_timeout; case -ETIMEDOUT: dprintk("RPC: %4d rpcbind request timed out\n", task->tk_pid); - if (RPC_IS_SOFT(task)) { - status = -EIO; - break; - } - goto retry_bind; + goto retry_timeout; case -EPFNOSUPPORT: dprintk("RPC: %4d remote rpcbind service unavailable\n", task->tk_pid); @@ -840,16 +878,13 @@ call_bind_status(struct rpc_task *task) dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); status = -EIO; - break; } rpc_exit(task, status); return; -retry_bind: - task->tk_status = 0; - task->tk_action = call_bind; - return; +retry_timeout: + task->tk_action = call_timeout; } /* @@ -897,14 +932,16 @@ call_connect_status(struct rpc_task *task) switch (status) { case -ENOTCONN: - case -ETIMEDOUT: case -EAGAIN: task->tk_action = call_bind; - break; - default: - rpc_exit(task, -EIO); - break; + if (!RPC_IS_SOFT(task)) + return; + /* if soft mounted, test if we've timed out */ + case -ETIMEDOUT: + task->tk_action = call_timeout; + return; } + rpc_exit(task, -EIO); } /* @@ -922,26 +959,43 @@ call_transmit(struct rpc_task *task) task->tk_status = xprt_prepare_transmit(task); if (task->tk_status != 0) return; + task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { - task->tk_rqstp->rq_bytes_sent = 0; + BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); call_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) - goto out_nosend; + return; } - task->tk_action = call_transmit_status; xprt_transmit(task); if (task->tk_status < 0) return; - if (!task->tk_msg.rpc_proc->p_decode) { - task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); - } - return; -out_nosend: - /* release socket write lock before attempting to handle error */ - xprt_abort_transmit(task); + /* + * On success, ensure that we call xprt_end_transmit() before sleeping + * in order to allow access to the socket to other RPC requests. + */ + call_transmit_status(task); + if (task->tk_msg.rpc_proc->p_decode != NULL) + return; + task->tk_action = rpc_exit_task; + rpc_wake_up_task(task); +} + +/* + * 5a. Handle cleanup after a transmission + */ +static void +call_transmit_status(struct rpc_task *task) +{ + task->tk_action = call_status; + /* + * Special case: if we've been waiting on the socket's write_space() + * callback, then don't call xprt_end_transmit(). + */ + if (task->tk_status == -EAGAIN) + return; + xprt_end_transmit(task); rpc_task_force_reencode(task); } @@ -969,6 +1023,14 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + /* + * Delay any retries for 3 seconds, then handle as if it + * were a timeout. + */ + rpc_delay(task, 3*HZ); case -ETIMEDOUT: task->tk_action = call_timeout; break; @@ -988,23 +1050,11 @@ call_status(struct rpc_task *task) printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); - break; } } /* - * 6a. Handle transmission errors. - */ -static void -call_transmit_status(struct rpc_task *task) -{ - if (task->tk_status != -EAGAIN) - rpc_task_force_reencode(task); - call_status(task); -} - -/* - * 6b. Handle RPC timeout + * 6a. Handle RPC timeout * We do not release the request slot, so we keep using the * same XID for all retransmits. */ @@ -1050,7 +1100,7 @@ call_decode(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode; - u32 *p; + __be32 *p; dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); @@ -1067,10 +1117,10 @@ call_decode(struct rpc_task *task) clnt->cl_stats->rpcretrans++; goto out_retry; } - printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", + dprintk("%s: too small RPC reply size (%d bytes)\n", clnt->cl_protname, task->tk_status); - rpc_exit(task, -EIO); - return; + task->tk_action = call_timeout; + goto out_retry; } /* @@ -1147,12 +1197,12 @@ call_refreshresult(struct rpc_task *task) /* * Call header serialization */ -static u32 * +static __be32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; - u32 *p = req->rq_svec[0].iov_base; + __be32 *p = req->rq_svec[0].iov_base; /* FIXME: check buffer size? */ @@ -1171,14 +1221,26 @@ call_header(struct rpc_task *task) /* * Reply header verification */ -static u32 * +static __be32 * call_verify(struct rpc_task *task) { struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; int len = task->tk_rqstp->rq_rcv_buf.len >> 2; - u32 *p = iov->iov_base, n; + __be32 *p = iov->iov_base; + u32 n; int error = -EACCES; + if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { + /* RFC-1014 says that the representation of XDR data must be a + * multiple of four bytes + * - if it isn't pointer subtraction in the NFS client may give + * undefined results + */ + printk(KERN_WARNING + "call_verify: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + goto out_eio; + } if ((len -= 3) < 0) goto out_overflow; p += 1; /* skip XID */ @@ -1242,7 +1304,7 @@ call_verify(struct rpc_task *task) printk(KERN_WARNING "call_verify: auth check failed\n"); goto out_garbage; /* bad verifier, retry */ } - len = p - (u32 *)iov->iov_base - 1; + len = p - (__be32 *)iov->iov_base - 1; if (len < 0) goto out_overflow; switch ((n = ntohl(*p++))) { @@ -1297,12 +1359,12 @@ out_overflow: goto out_garbage; } -static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) +static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj) { return 0; } -static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj) +static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj) { return 0; } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index d25b054ec921..919d5ba7ca0a 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -1,12 +1,13 @@ /* - * linux/net/sunrpc/pmap.c + * linux/net/sunrpc/pmap_clnt.c * - * Portmapper client. + * In-kernel RPC portmapper client. + * + * Portmapper supports version 2 of the rpcbind protocol (RFC 1833). * * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> @@ -14,7 +15,6 @@ #include <linux/uio.h> #include <linux/in.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/sched.h> #ifdef RPC_DEBUG @@ -25,80 +25,141 @@ #define PMAP_UNSET 2 #define PMAP_GETPORT 3 +struct portmap_args { + u32 pm_prog; + u32 pm_vers; + u32 pm_prot; + unsigned short pm_port; + struct rpc_xprt * pm_xprt; +}; + static struct rpc_procinfo pmap_procedures[]; static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); -static void pmap_getport_done(struct rpc_task *); +static void pmap_getport_done(struct rpc_task *, void *); static struct rpc_program pmap_program; -static DEFINE_SPINLOCK(pmap_lock); -/* - * Obtain the port for a given RPC service on a given host. This one can - * be called for an ongoing RPC request. - */ -void -rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) +static void pmap_getport_prepare(struct rpc_task *task, void *calldata) { - struct rpc_portmap *map = clnt->cl_pmap; - struct sockaddr_in *sap = &clnt->cl_xprt->addr; + struct portmap_args *map = calldata; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], .rpc_argp = map, - .rpc_resp = &clnt->cl_port, - .rpc_cred = NULL + .rpc_resp = &map->pm_port, }; + + rpc_call_setup(task, &msg, 0); +} + +static inline struct portmap_args *pmap_map_alloc(void) +{ + return kmalloc(sizeof(struct portmap_args), GFP_NOFS); +} + +static inline void pmap_map_free(struct portmap_args *map) +{ + kfree(map); +} + +static void pmap_map_release(void *data) +{ + pmap_map_free(data); +} + +static const struct rpc_call_ops pmap_getport_ops = { + .rpc_call_prepare = pmap_getport_prepare, + .rpc_call_done = pmap_getport_done, + .rpc_release = pmap_map_release, +}; + +static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + +/** + * rpc_getport - obtain the port for a given RPC service on a given host + * @task: task that is waiting for portmapper request + * + * This one can be called for an ongoing RPC request, and can be used in + * an async (rpciod) context. + */ +void rpc_getport(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; + struct sockaddr_in addr; + struct portmap_args *map; struct rpc_clnt *pmap_clnt; - struct rpc_task *child; + struct rpc_task *child; + int status; - dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", + dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n", task->tk_pid, clnt->cl_server, - map->pm_prog, map->pm_vers, map->pm_prot); + clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); - spin_lock(&pmap_lock); - if (map->pm_binding) { - rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); - spin_unlock(&pmap_lock); + if (xprt_test_and_set_binding(xprt)) { + task->tk_status = -EACCES; /* tell caller to check again */ + rpc_sleep_on(&xprt->binding, task, NULL, NULL); return; } - map->pm_binding = 1; - spin_unlock(&pmap_lock); - pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); - if (IS_ERR(pmap_clnt)) { - task->tk_status = PTR_ERR(pmap_clnt); + /* Someone else may have bound if we slept */ + status = 0; + if (xprt_bound(xprt)) + goto bailout_nofree; + + status = -ENOMEM; + map = pmap_map_alloc(); + if (!map) + goto bailout_nofree; + map->pm_prog = clnt->cl_prog; + map->pm_vers = clnt->cl_vers; + map->pm_prot = xprt->prot; + map->pm_port = 0; + map->pm_xprt = xprt_get(xprt); + + rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); + pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); + status = PTR_ERR(pmap_clnt); + if (IS_ERR(pmap_clnt)) goto bailout; - } - task->tk_status = 0; - /* - * Note: rpc_new_child will release client after a failure. - */ - if (!(child = rpc_new_child(pmap_clnt, task))) + status = -EIO; + child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); + if (IS_ERR(child)) goto bailout; + rpc_release_task(child); - /* Setup the call info struct */ - rpc_call_setup(child, &msg, 0); + rpc_sleep_on(&xprt->binding, task, NULL, NULL); - /* ... and run the child task */ task->tk_xprt->stat.bind_count++; - rpc_run_child(task, child, pmap_getport_done); return; bailout: - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); - rpc_exit(task, -EIO); + pmap_map_free(map); + xprt_put(xprt); +bailout_nofree: + task->tk_status = status; + pmap_wake_portmap_waiters(xprt, status); } #ifdef CONFIG_ROOT_NFS -int -rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) +/** + * rpc_getport_external - obtain the port for a given RPC service on a given host + * @sin: address of remote peer + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * + * This one is called from outside the RPC client in a synchronous task context. + */ +int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) { - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -113,7 +174,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) char hostname[32]; int status; - dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", + dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); @@ -133,45 +194,53 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) } #endif -static void -pmap_getport_done(struct rpc_task *task) +/* + * Portmapper child task invokes this callback via tk_exit. + */ +static void pmap_getport_done(struct rpc_task *child, void *data) { - struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_portmap *map = clnt->cl_pmap; - - dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", - task->tk_pid, task->tk_status, clnt->cl_port); - - xprt->ops->set_port(xprt, 0); - if (task->tk_status < 0) { - /* Make the calling task exit with an error */ - task->tk_action = rpc_exit_task; - } else if (clnt->cl_port == 0) { - /* Program not registered */ - rpc_exit(task, -EACCES); + struct portmap_args *map = data; + struct rpc_xprt *xprt = map->pm_xprt; + int status = child->tk_status; + + if (status < 0) { + /* Portmapper not available */ + xprt->ops->set_port(xprt, 0); + } else if (map->pm_port == 0) { + /* Requested RPC service wasn't registered */ + xprt->ops->set_port(xprt, 0); + status = -EACCES; } else { - xprt->ops->set_port(xprt, clnt->cl_port); - clnt->cl_port = htons(clnt->cl_port); + /* Succeeded */ + xprt->ops->set_port(xprt, map->pm_port); + xprt_set_bound(xprt); + status = 0; } - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); + + dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n", + child->tk_pid, status, map->pm_port); + + pmap_wake_portmap_waiters(xprt, status); + xprt_put(xprt); } -/* - * Set or unset a port registration with the local portmapper. +/** + * rpc_register - set or unset a port registration with the local portmapper + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * @port: port value to register + * @okay: result code + * * port == 0 means unregister, port != 0 means register. */ -int -rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -185,7 +254,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) struct rpc_clnt *pmap_clnt; int error = 0; - dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", + dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n", prog, vers, prot, port); pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); @@ -208,38 +277,32 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -static struct rpc_clnt * -pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) +static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) { - struct rpc_xprt *xprt; - struct rpc_clnt *clnt; - - /* printk("pmap: create xprt\n"); */ - xprt = xprt_create_proto(proto, srvaddr, NULL); - if (IS_ERR(xprt)) - return (struct rpc_clnt *)xprt; - xprt->ops->set_port(xprt, RPC_PMAP_PORT); + struct rpc_create_args args = { + .protocol = proto, + .address = (struct sockaddr *)srvaddr, + .addrsize = sizeof(*srvaddr), + .servername = hostname, + .program = &pmap_program, + .version = RPC_PMAP_VERSION, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_ONESHOT | + RPC_CLNT_CREATE_NOPING), + }; + + srvaddr->sin_port = htons(RPC_PMAP_PORT); if (!privileged) - xprt->resvport = 0; - - /* printk("pmap: create clnt\n"); */ - clnt = rpc_new_client(xprt, hostname, - &pmap_program, RPC_PMAP_VERSION, - RPC_AUTH_UNIX); - if (!IS_ERR(clnt)) { - clnt->cl_softrtry = 1; - clnt->cl_oneshot = 1; - } - return clnt; + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + return rpc_create(&args); } /* * XDR encode/decode functions for PMAP */ -static int -xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) +static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map) { - dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", + dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); *p++ = htonl(map->pm_prog); *p++ = htonl(map->pm_vers); @@ -250,15 +313,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) return 0; } -static int -xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) +static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); return 0; } -static int -xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) +static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); return 0; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 8241fa726803..700c6e061a04 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -8,7 +8,6 @@ * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no> * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> @@ -328,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v) seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); - seq_printf(m, "address: %u.%u.%u.%u\n", - NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); - seq_printf(m, "protocol: %s\n", - clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); + seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); return 0; } @@ -439,7 +436,7 @@ struct vfsmount *rpc_get_mount(void) { int err; - err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count); + err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); if (err != 0) return ERR_PTR(err); return rpc_mount; @@ -491,7 +488,6 @@ rpc_get_inode(struct super_block *sb, int mode) return NULL; inode->i_mode = mode; inode->i_uid = inode->i_gid = 0; - inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { @@ -516,7 +512,7 @@ rpc_depopulate(struct dentry *parent) struct dentry *dentry, *dvec[10]; int n = 0; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { @@ -540,6 +536,7 @@ repeat: rpc_close_pipes(dentry->d_inode); simple_unlink(dir, dentry); } + inode_dir_notify(dir, DN_DELETE); dput(dentry); } while (n); goto repeat; @@ -611,8 +608,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) int error; shrink_dcache_parent(dentry); - if (dentry->d_inode) - rpc_close_pipes(dentry->d_inode); + if (d_unhashed(dentry)) + return 0; if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -623,17 +620,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) } static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +rpc_lookup_create(struct dentry *parent, const char *name, int len) { + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dir = nd->dentry->d_inode; - mutex_lock(&dir->i_mutex); - dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; if (dentry->d_inode) { @@ -644,7 +637,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd) return dentry; out_err: mutex_unlock(&dir->i_mutex); - rpc_release_path(nd); + return dentry; +} + +static struct dentry * +rpc_lookup_negative(char *path, struct nameidata *nd) +{ + struct dentry *dentry; + int error; + + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + if (IS_ERR(dentry)) + rpc_release_path(nd); return dentry; } @@ -668,10 +674,11 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client) RPCAUTH_info, RPCAUTH_EOF); if (error) goto err_depopulate; + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_depopulate: rpc_depopulate(dentry); __rpc_rmdir(dir, dentry); @@ -684,44 +691,35 @@ err_dput: } int -rpc_rmdir(char *path) +rpc_rmdir(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; int error; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; - mutex_lock(&dir->i_mutex); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); rpc_depopulate(dentry); error = __rpc_rmdir(dir, dentry); dput(dentry); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } struct dentry * -rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) +rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) { - struct nameidata nd; struct dentry *dentry; struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_negative(path, &nd); + dentry = rpc_lookup_create(parent, name, strlen(name)); if (IS_ERR(dentry)) return dentry; - dir = nd.dentry->d_inode; - inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); + dir = parent->d_inode; + inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; inode->i_ino = iunique(dir->i_sb, 100); @@ -732,45 +730,40 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) rpci->flags = flags; rpci->ops = ops; inode_dir_notify(dir, DN_CREATE); + dget(dentry); out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); - return dget(dentry); + return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); - printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", - __FILE__, __FUNCTION__, path, -ENOMEM); + printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", + __FILE__, __FUNCTION__, parent->d_name.name, name, + -ENOMEM); goto out; } int -rpc_unlink(char *path) +rpc_unlink(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; - int error; + int error = 0; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; - mutex_lock(&dir->i_mutex); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + if (!d_unhashed(dentry)) { + d_drop(dentry); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + } + inode_dir_notify(dir, DN_DELETE); } dput(dentry); - inode_dir_notify(dir, DN_DELETE); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } @@ -864,7 +857,6 @@ int register_rpc_pipefs(void) void unregister_rpc_pipefs(void) { - if (kmem_cache_destroy(rpc_inode_cachep)) - printk(KERN_WARNING "RPC: unable to free inode cache\n"); + kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5c3eee768504..a1ab4eed41f4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -21,7 +21,6 @@ #include <linux/mutex.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprt.h> #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED @@ -45,12 +44,6 @@ static void rpciod_killall(void); static void rpc_async_schedule(void *); /* - * RPC tasks that create another task (e.g. for contacting the portmapper) - * will wait on this queue for their child's completion - */ -static RPC_WAITQ(childq, "childq"); - -/* * RPC tasks sit here while waiting for conditions to improve. */ static RPC_WAITQ(delay_queue, "delayq"); @@ -324,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task) } /* - * Place a newly initialized task on the workqueue. - */ -static inline void -rpc_schedule_run(struct rpc_task *task) -{ - rpc_set_active(task); - rpc_make_runnable(task); -} - -/* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. * NB: An RPC task will only receive interrupt-driven events as long @@ -559,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) spin_unlock_bh(&queue->lock); } +static void __rpc_atrun(struct rpc_task *task) +{ + rpc_wake_up_task(task); +} + /* * Run a task at a later time */ -static void __rpc_atrun(struct rpc_task *); -void -rpc_delay(struct rpc_task *task, unsigned long delay) +void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); } -static void -__rpc_atrun(struct rpc_task *task) -{ - task->tk_status = 0; - rpc_wake_up_task(task); -} - /* * Helper to call task->tk_ops->rpc_call_prepare */ @@ -933,72 +912,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, } EXPORT_SYMBOL(rpc_run_task); -/** - * rpc_find_parent - find the parent of a child task. - * @child: child task - * @parent: parent task - * - * Checks that the parent task is still sleeping on the - * queue 'childq'. If so returns a pointer to the parent. - * Upon failure returns NULL. - * - * Caller must hold childq.lock - */ -static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) -{ - struct rpc_task *task; - struct list_head *le; - - task_for_each(task, le, &childq.tasks[0]) - if (task == parent) - return parent; - - return NULL; -} - -static void rpc_child_exit(struct rpc_task *child, void *calldata) -{ - struct rpc_task *parent; - - spin_lock_bh(&childq.lock); - if ((parent = rpc_find_parent(child, calldata)) != NULL) { - parent->tk_status = child->tk_status; - __rpc_wake_up_task(parent); - } - spin_unlock_bh(&childq.lock); -} - -static const struct rpc_call_ops rpc_child_ops = { - .rpc_call_done = rpc_child_exit, -}; - -/* - * Note: rpc_new_task releases the client after a failure. - */ -struct rpc_task * -rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) -{ - struct rpc_task *task; - - task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); - if (!task) - goto fail; - return task; - -fail: - parent->tk_status = -ENOMEM; - return NULL; -} - -void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) -{ - spin_lock_bh(&childq.lock); - /* N.B. Is it possible for the child to have already finished? */ - __rpc_sleep_on(&childq, task, func, NULL); - rpc_schedule_run(child); - spin_unlock_bh(&childq.lock); -} - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -1146,10 +1059,10 @@ rpc_destroy_mempool(void) mempool_destroy(rpc_buffer_mempool); if (rpc_task_mempool) mempool_destroy(rpc_task_mempool); - if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp)) - printk(KERN_INFO "rpc_task: not all structures were freed\n"); - if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp)) - printk(KERN_INFO "rpc_buffers: not all structures were freed\n"); + if (rpc_task_slabp) + kmem_cache_destroy(rpc_task_slabp); + if (rpc_buffer_slabp) + kmem_cache_destroy(rpc_buffer_slabp); } int diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index eb330d4f66d6..6f17527b9e69 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if ((unsigned short)csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 15c2db26767b..bd98124c3a64 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -114,13 +114,8 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { - unsigned int ops = clnt->cl_maxproc; - size_t size = ops * sizeof(struct rpc_iostats); struct rpc_iostats *new; - - new = kmalloc(size, GFP_KERNEL); - if (new) - memset(new, 0 , size); + new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); return new; } EXPORT_SYMBOL(rpc_alloc_iostats); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 769114f0f886..26c0531d7e25 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -6,7 +6,6 @@ * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -37,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status); EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ -EXPORT_SYMBOL(rpc_create_client); -EXPORT_SYMBOL(rpc_new_client); EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); @@ -58,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall); EXPORT_SYMBOL(rpc_mkpipe); /* Client transport */ -EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_set_timeout); /* Client credential cache */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b08419e1fc68..44b8d9d4c18a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -32,9 +32,8 @@ svc_create(struct svc_program *prog, unsigned int bufsize) int vers; unsigned int xdrsize; - if (!(serv = kmalloc(sizeof(*serv), GFP_KERNEL))) + if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; - memset(serv, 0, sizeof(*serv)); serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -159,11 +158,10 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) struct svc_rqst *rqstp; int error = -ENOMEM; - rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL); + rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); if (!rqstp) goto out; - memset(rqstp, 0, sizeof(*rqstp)); init_waitqueue_head(&rqstp->rq_wait); if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) @@ -258,11 +256,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) struct kvec * argv = &rqstp->rq_arg.head[0]; struct kvec * resv = &rqstp->rq_res.head[0]; kxdrproc_t xdr; - u32 *statp; - u32 dir, prog, vers, proc, - auth_stat, rpc_stat; + __be32 *statp; + u32 dir, prog, vers, proc; + __be32 auth_stat, rpc_stat; int auth_res; - u32 *accept_statp; + __be32 *accept_statp; rpc_stat = rpc_success; @@ -280,19 +278,22 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; rqstp->rq_res.buflen = PAGE_SIZE; + rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; + /* Will be turned off only in gss privacy case: */ + rqstp->rq_sendfile_ok = 1; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) - svc_putu32(resv, 0); + svc_putnl(resv, 0); rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = ntohl(svc_getu32(argv)); - vers = ntohl(svc_getu32(argv)); + dir = svc_getnl(argv); + vers = svc_getnl(argv); /* First words of reply: */ - svc_putu32(resv, xdr_one); /* REPLY */ + svc_putnl(resv, 1); /* REPLY */ if (dir != 0) /* direction != CALL */ goto err_bad_dir; @@ -302,11 +303,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) /* Save position in case we later decide to reject: */ accept_statp = resv->iov_base + resv->iov_len; - svc_putu32(resv, xdr_zero); /* ACCEPT */ + svc_putnl(resv, 0); /* ACCEPT */ - rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */ - rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */ - rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ + rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ + rqstp->rq_vers = vers = svc_getnl(argv); /* version number */ + rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */ progp = serv->sv_program; @@ -360,7 +361,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) /* Build the reply header. */ statp = resv->iov_base +resv->iov_len; - svc_putu32(resv, rpc_success); /* RPC_SUCCESS */ + svc_putnl(resv, RPC_SUCCESS); /* Bump per-procedure stats counter */ procp->pc_count++; @@ -438,10 +439,10 @@ err_bad_dir: err_bad_rpc: serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, xdr_one); /* REJECT */ - svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */ - svc_putu32(resv, xdr_two); /* Only RPCv2 supported */ - svc_putu32(resv, xdr_two); + svc_putnl(resv, 1); /* REJECT */ + svc_putnl(resv, 0); /* RPC_MISMATCH */ + svc_putnl(resv, 2); /* Only RPCv2 supported */ + svc_putnl(resv, 2); goto sendit; err_bad_auth: @@ -449,15 +450,15 @@ err_bad_auth: serv->sv_stats->rpcbadauth++; /* Restore write pointer to location of accept status: */ xdr_ressize_check(rqstp, accept_statp); - svc_putu32(resv, xdr_one); /* REJECT */ - svc_putu32(resv, xdr_one); /* AUTH_ERROR */ - svc_putu32(resv, auth_stat); /* status */ + svc_putnl(resv, 1); /* REJECT */ + svc_putnl(resv, 1); /* AUTH_ERROR */ + svc_putnl(resv, ntohl(auth_stat)); /* status */ goto sendit; err_bad_prog: dprintk("svc: unknown program %d\n", prog); serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_prog_unavail); + svc_putnl(resv, RPC_PROG_UNAVAIL); goto sendit; err_bad_vers: @@ -465,9 +466,9 @@ err_bad_vers: printk("svc: unknown version (%d)\n", vers); #endif serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_prog_mismatch); - svc_putu32(resv, htonl(progp->pg_lovers)); - svc_putu32(resv, htonl(progp->pg_hivers)); + svc_putnl(resv, RPC_PROG_MISMATCH); + svc_putnl(resv, progp->pg_lovers); + svc_putnl(resv, progp->pg_hivers); goto sendit; err_bad_proc: @@ -475,7 +476,7 @@ err_bad_proc: printk("svc: unknown procedure (%d)\n", proc); #endif serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_proc_unavail); + svc_putnl(resv, RPC_PROC_UNAVAIL); goto sendit; err_garbage: @@ -485,6 +486,6 @@ err_garbage: rpc_stat = rpc_garbage_args; err_bad: serv->sv_stats->rpcbadfmt++; - svc_putu32(resv, rpc_stat); + svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 5b28c6176806..8f2320aded5c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -35,14 +35,14 @@ static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = { }; int -svc_authenticate(struct svc_rqst *rqstp, u32 *authp) +svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) { rpc_authflavor_t flavor; struct auth_ops *aops; *authp = rpc_auth_ok; - flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0])); + flavor = svc_getnl(&rqstp->rq_arg.head[0]); dprintk("svc: svc_authenticate (%d)\n", flavor); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 7e5707e2d6b6..1020d54b01d0 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -145,7 +145,7 @@ static void ip_map_request(struct cache_detail *cd, { char text_addr[20]; struct ip_map *im = container_of(h, struct ip_map, h); - __u32 addr = im->m_addr.s_addr; + __be32 addr = im->m_addr.s_addr; snprintf(text_addr, 20, "%u.%u.%u.%u", ntohl(addr) >> 24 & 0xff, @@ -249,10 +249,10 @@ static int ip_map_show(struct seq_file *m, seq_printf(m, "%s %d.%d.%d.%d %s\n", im->m_class, - htonl(addr.s_addr) >> 24 & 0xff, - htonl(addr.s_addr) >> 16 & 0xff, - htonl(addr.s_addr) >> 8 & 0xff, - htonl(addr.s_addr) >> 0 & 0xff, + ntohl(addr.s_addr) >> 24 & 0xff, + ntohl(addr.s_addr) >> 16 & 0xff, + ntohl(addr.s_addr) >> 8 & 0xff, + ntohl(addr.s_addr) >> 0 & 0xff, dom ); return 0; @@ -410,7 +410,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) } static int -svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -427,7 +427,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) *authp = rpc_autherr_badcred; return SVC_DENIED; } - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { dprintk("svc: bad null verf\n"); *authp = rpc_autherr_badverf; return SVC_DENIED; @@ -441,8 +441,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) return SVC_DROP; /* kmalloc failure - client must retry */ /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); + svc_putnl(resv, RPC_AUTH_NULL); + svc_putnl(resv, 0); return SVC_OK; } @@ -472,7 +472,7 @@ struct auth_ops svcauth_null = { static int -svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) +svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -488,31 +488,31 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) svc_getu32(argv); /* length */ svc_getu32(argv); /* time stamp */ - slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */ + slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */ if (slen > 64 || (len -= (slen + 3)*4) < 0) goto badcred; - argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */ + argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; - cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */ - cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */ - slen = ntohl(svc_getu32(argv)); /* gids length */ + cred->cr_uid = svc_getnl(argv); /* uid */ + cred->cr_gid = svc_getnl(argv); /* gid */ + slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; cred->cr_group_info = groups_alloc(slen); if (cred->cr_group_info == NULL) return SVC_DROP; for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); + GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; } /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); + svc_putnl(resv, RPC_AUTH_NULL); + svc_putnl(resv, 0); return SVC_OK; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a27905a0ad27..5b0fe1b66a23 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); + len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; slen -= xdr->head[0].iov_len; @@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) while (pglen > 0) { if (slen == size) flags = 0; - result = sock->ops->sendpage(sock, *ppage, base, size, flags); + result = kernel_sendpage(sock, *ppage, base, size, flags); if (result > 0) len += result; if (result != size) @@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) } /* send tail */ if (xdr->tail[0].iov_len) { - result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], + result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); @@ -434,13 +434,10 @@ out: static int svc_recv_available(struct svc_sock *svsk) { - mm_segment_t oldfs; struct socket *sock = svsk->sk_sock; int avail, err; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); - set_fs(oldfs); + err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); return (err >= 0)? avail : err; } @@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) * at accept time. FIXME */ alen = sizeof(rqstp->rq_addr); - sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); + kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); @@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; - const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; @@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk) if (!sock) return; - err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); - if (err) { + clear_bit(SK_CONN, &svsk->sk_flags); + err = kernel_accept(sock, &newsock, O_NONBLOCK); + if (err < 0) { if (err == -ENOMEM) printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); - return; - } - - dprintk("svc: tcp_accept %p allocated\n", newsock); - newsock->ops = ops = sock->ops; - - clear_bit(SK_CONN, &svsk->sk_flags); - if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { - if (err != -EAGAIN && net_ratelimit()) + else if (err != -EAGAIN && net_ratelimit()) printk(KERN_WARNING "%s: accept failed (err %d)!\n", serv->sv_name, -err); - goto failed; /* aborted connection or whatever */ + return; } + set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); slen = sizeof(sin); - err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); + err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", @@ -1040,7 +1030,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp) { struct xdr_buf *xbufp = &rqstp->rq_res; int sent; - u32 reclen; + __be32 reclen; /* Set up the first element of the reply kvec. * Any other kvecs that may be in use have been taken @@ -1322,11 +1312,10 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, struct sock *inet; dprintk("svc: svc_setup_socket %p\n", sock); - if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) { + if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { *errp = -ENOMEM; return NULL; } - memset(svsk, 0, sizeof(*svsk)); inet = sock->sk; @@ -1404,17 +1393,15 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) return error; - if (sin != NULL) { - if (type == SOCK_STREAM) - sock->sk->sk_reuse = 1; /* allow address reuse */ - error = sock->ops->bind(sock, (struct sockaddr *) sin, - sizeof(*sin)); - if (error < 0) - goto bummer; - } + if (type == SOCK_STREAM) + sock->sk->sk_reuse = 1; /* allow address reuse */ + error = kernel_bind(sock, (struct sockaddr *) sin, + sizeof(*sin)); + if (error < 0) + goto bummer; if (protocol == IPPROTO_TCP) { - if ((error = sock->ops->listen(sock, 64)) < 0) + if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1065904841fd..d89b048ad6bb 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -7,7 +7,6 @@ * impossible at the moment. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/linkage.h> #include <linux/ctype.h> diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index bcbdf6430d5c..8142fdb8a930 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -19,8 +19,6 @@ #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xprt.h> -#include <linux/sunrpc/timer.h> #define RPC_RTO_MAX (60*HZ) #define RPC_RTO_INIT (HZ/5) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index ca4bfa57e116..9022eb8b37ed 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -18,8 +18,8 @@ /* * XDR functions for basic NFS types */ -u32 * -xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) +__be32 * +xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) { unsigned int quadlen = XDR_QUADLEN(obj->len); @@ -29,8 +29,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) return p + XDR_QUADLEN(obj->len); } -u32 * -xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) +__be32 * +xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) { unsigned int len; @@ -55,7 +55,7 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) * Returns the updated current XDR buffer position * */ -u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes) +__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int nbytes) { if (likely(nbytes != 0)) { unsigned int quadlen = XDR_QUADLEN(nbytes); @@ -79,21 +79,21 @@ EXPORT_SYMBOL(xdr_encode_opaque_fixed); * * Returns the updated current XDR buffer position */ -u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes) +__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { *p++ = htonl(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL(xdr_encode_opaque); -u32 * -xdr_encode_string(u32 *p, const char *string) +__be32 * +xdr_encode_string(__be32 *p, const char *string) { return xdr_encode_array(p, string, strlen(string)); } -u32 * -xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) +__be32 * +xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) { unsigned int len; @@ -191,7 +191,6 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, do { /* Are any pointers crossing a page boundary? */ if (pgto_base == 0) { - flush_dcache_page(*pgto); pgto_base = PAGE_CACHE_SIZE; pgto--; } @@ -211,11 +210,11 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, vto = kmap_atomic(*pgto, KM_USER0); vfrom = kmap_atomic(*pgfrom, KM_USER1); memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + flush_dcache_page(*pgto); kunmap_atomic(vfrom, KM_USER1); kunmap_atomic(vto, KM_USER0); } while ((len -= copy) != 0); - flush_dcache_page(*pgto); } /* @@ -433,7 +432,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) * of the buffer length, and takes care of adjusting the kvec * length for us. */ -void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) +void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; @@ -441,8 +440,8 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; - xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len); - xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len); + xdr->p = (__be32 *)((char *)iov->iov_base + iov->iov_len); + xdr->end = (__be32 *)((char *)iov->iov_base + scratch_len); BUG_ON(iov->iov_len > scratch_len); if (p != xdr->p && p != NULL) { @@ -466,10 +465,10 @@ EXPORT_SYMBOL(xdr_init_encode); * bytes of data. If so, update the total xdr_buf length, and * adjust the length of the current kvec. */ -uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) +__be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) { - uint32_t *p = xdr->p; - uint32_t *q; + __be32 *p = xdr->p; + __be32 *q; /* align nbytes on the next 32-bit boundary */ nbytes += 3; @@ -525,7 +524,7 @@ EXPORT_SYMBOL(xdr_write_pages); * @buf: pointer to XDR buffer from which to decode data * @p: current pointer inside XDR buffer */ -void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) +void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { struct kvec *iov = buf->head; unsigned int len = iov->iov_len; @@ -535,7 +534,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) xdr->buf = buf; xdr->iov = iov; xdr->p = p; - xdr->end = (uint32_t *)((char *)iov->iov_base + len); + xdr->end = (__be32 *)((char *)iov->iov_base + len); } EXPORT_SYMBOL(xdr_init_decode); @@ -549,10 +548,10 @@ EXPORT_SYMBOL(xdr_init_decode); * If so return the current pointer, then update the current * pointer position. */ -uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) +__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { - uint32_t *p = xdr->p; - uint32_t *q = p + XDR_QUADLEN(nbytes); + __be32 *p = xdr->p; + __be32 *q = p + XDR_QUADLEN(nbytes); if (unlikely(q > xdr->end || q < p)) return NULL; @@ -568,8 +567,7 @@ EXPORT_SYMBOL(xdr_inline_decode); * * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" - * bytes is moved into the XDR tail[]. The current pointer is then - * repositioned at the beginning of the XDR tail. + * bytes is moved into the XDR tail[]. */ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { @@ -601,11 +599,36 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) * Position current pointer at beginning of tail, and * set remaining message length. */ - xdr->p = (uint32_t *)((char *)iov->iov_base + padding); - xdr->end = (uint32_t *)((char *)iov->iov_base + end); + xdr->p = (__be32 *)((char *)iov->iov_base + padding); + xdr->end = (__be32 *)((char *)iov->iov_base + end); } EXPORT_SYMBOL(xdr_read_pages); +/** + * xdr_enter_page - decode data from the XDR page + * @xdr: pointer to xdr_stream struct + * @len: number of bytes of page data + * + * Moves data beyond the current pointer position from the XDR head[] buffer + * into the page list. Any data that lies beyond current position + "len" + * bytes is moved into the XDR tail[]. The current pointer is then + * repositioned at the beginning of the first XDR page. + */ +void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) +{ + char * kaddr = page_address(xdr->buf->pages[0]); + xdr_read_pages(xdr, len); + /* + * Position current pointer at beginning of tail, and + * set remaining message length. + */ + if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) + len = PAGE_CACHE_SIZE - xdr->buf->page_base; + xdr->p = (__be32 *)(kaddr + xdr->buf->page_base); + xdr->end = (__be32 *)((char *)xdr->p + len); +} +EXPORT_SYMBOL(xdr_enter_page); + static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void @@ -720,7 +743,7 @@ out: int xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { - u32 raw; + __be32 raw; int status; status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); @@ -733,7 +756,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) int xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) { - u32 raw = htonl(obj); + __be32 raw = htonl(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4dd5b3cfe754..80857470dc11 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -41,7 +41,7 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/workqueue.h> -#include <linux/random.h> +#include <linux/net.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/metrics.h> @@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task) dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - if (!xprt->addr.sin_port) { + if (!xprt_bound(xprt)) { task->tk_status = -EIO; return; } @@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task) task->tk_pid, -task->tk_status, task->tk_client->cl_server); xprt_release_write(xprt, task); task->tk_status = -EIO; - return; - } - - /* if soft mounted, just cause this RPC to fail */ - if (RPC_IS_SOFT(task)) { - xprt_release_write(xprt, task); - task->tk_status = -EIO; } } @@ -601,7 +594,7 @@ static void xprt_connect_status(struct rpc_task *task) * @xid: RPC XID of incoming reply * */ -struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) +struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) { struct list_head *pos; @@ -707,12 +700,9 @@ out_unlock: return err; } -void -xprt_abort_transmit(struct rpc_task *task) +void xprt_end_transmit(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; - - xprt_release_write(xprt, task); + xprt_release_write(task->tk_xprt, task); } /** @@ -761,8 +751,6 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = -ENOTCONN; else if (!req->rq_received) rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - - xprt->ops->release_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return; } @@ -772,18 +760,8 @@ void xprt_transmit(struct rpc_task *task) * schedq, and being picked up by a parallel run of rpciod(). */ task->tk_status = status; - - switch (status) { - case -ECONNREFUSED: + if (status == -ECONNREFUSED) rpc_sleep_on(&xprt->sending, task, NULL, NULL); - case -EAGAIN: - case -ENOTCONN: - return; - default: - break; - } - xprt_release_write(xprt, task); - return; } static inline void do_xprt_reserve(struct rpc_task *task) @@ -823,14 +801,14 @@ void xprt_reserve(struct rpc_task *task) spin_unlock(&xprt->reserve_lock); } -static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) +static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { return xprt->xid++; } static inline void xprt_init_xid(struct rpc_xprt *xprt) { - get_random_bytes(&xprt->xid, sizeof(xprt->xid)); + xprt->xid = net_random(); } static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) @@ -844,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; + xprt_reset_majortimeo(req); dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, ntohl(req->rq_xid)); } @@ -902,17 +881,32 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i to->to_exponential = 0; } -static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) +/** + * xprt_create_transport - create an RPC transport + * @proto: requested transport protocol + * @ap: remote peer address + * @size: length of address + * @to: timeout parameters + * + */ +struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) { int result; struct rpc_xprt *xprt; struct rpc_rqst *req; - if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) + if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { + dprintk("RPC: xprt_create_transport: no memory\n"); return ERR_PTR(-ENOMEM); - memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ - - xprt->addr = *ap; + } + if (size <= sizeof(xprt->addr)) { + memcpy(&xprt->addr, ap, size); + xprt->addrlen = size; + } else { + kfree(xprt); + dprintk("RPC: xprt_create_transport: address too large\n"); + return ERR_PTR(-EBADF); + } switch (proto) { case IPPROTO_UDP: @@ -924,14 +918,15 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", proto); - result = -EIO; - break; + return ERR_PTR(-EIO); } if (result) { kfree(xprt); + dprintk("RPC: xprt_create_transport: failed, %d\n", result); return ERR_PTR(result); } + kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); @@ -944,6 +939,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); rpc_init_wait_queue(&xprt->resend, "xprt_resend"); @@ -957,41 +953,43 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); - - return xprt; -} -/** - * xprt_create_proto - create an RPC client transport - * @proto: requested transport protocol - * @sap: remote peer's address - * @to: timeout parameters for new transport - * - */ -struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) -{ - struct rpc_xprt *xprt; - - xprt = xprt_setup(proto, sap, to); - if (IS_ERR(xprt)) - dprintk("RPC: xprt_create_proto failed\n"); - else - dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); return xprt; } /** * xprt_destroy - destroy an RPC transport, killing off all requests. - * @xprt: transport to destroy + * @kref: kref for the transport to destroy * */ -int xprt_destroy(struct rpc_xprt *xprt) +static void xprt_destroy(struct kref *kref) { + struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); + dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); xprt->ops->destroy(xprt); kfree(xprt); +} - return 0; +/** + * xprt_put - release a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +void xprt_put(struct rpc_xprt *xprt) +{ + kref_put(&xprt->kref, xprt_destroy); +} + +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + kref_get(&xprt->kref); + return xprt; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4b4e7dfdff14..28100e019225 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif +static void xs_format_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) { + snprintf(buf, 20, "%u.%u.%u.%u", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + if (xprt->prot == IPPROTO_UDP) + xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; + else + xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) { + snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; +} + +static void xs_free_peer_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) @@ -174,7 +215,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; int err, ret = 0; - ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); if (unlikely(!sock)) return -ENOTCONN; @@ -207,7 +247,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a base &= ~PAGE_CACHE_MASK; } - sendpage = sock->ops->sendpage ? : sock_no_sendpage; do { int flags = XS_SENDMSG_FLAGS; @@ -220,10 +259,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a if (pglen != len || xdr->tail[0].iov_len != 0) flags |= MSG_MORE; - /* Hmm... We might be dealing with highmem pages */ - if (PageHighMem(*ppage)) - sendpage = sock_no_sendpage; - err = sendpage(sock, *ppage, base, len, flags); + err = kernel_sendpage(sock, *ppage, base, len, flags); if (ret == 0) ret = err; else if (err > 0) @@ -300,7 +336,7 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), xdr, req->rq_bytes_sent); + xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -414,6 +450,33 @@ static int xs_tcp_send_request(struct rpc_task *task) } /** + * xs_tcp_release_xprt - clean up after a tcp transmission + * @xprt: transport + * @task: rpc task + * + * This cleans up if an error causes us to abort the transmission of a request. + * In this case, the socket may need to be reset in order to avoid confusing + * the server. + */ +static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (task != xprt->snd_task) + return; + if (task == NULL) + goto out_release; + req = task->tk_rqstp; + if (req->rq_bytes_sent == 0) + goto out_release; + if (req->rq_bytes_sent == req->rq_snd_buf.len) + goto out_release; + set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state); +out_release: + xprt_release_xprt(xprt, task); +} + +/** * xs_close - close a socket * @xprt: transport * @@ -463,6 +526,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xprt_disconnect(xprt); xs_close(xprt); + xs_free_peer_addresses(xprt); kfree(xprt->slot); } @@ -484,7 +548,8 @@ static void xs_udp_data_ready(struct sock *sk, int len) struct rpc_rqst *rovr; struct sk_buff *skb; int err, repsize, copied; - u32 _xid, *xp; + u32 _xid; + __be32 *xp; read_lock(&sk->sk_callback_lock); dprintk("RPC: xs_udp_data_ready...\n"); @@ -930,6 +995,26 @@ static void xs_udp_timer(struct rpc_task *task) xprt_adjust_cwnd(task, -ETIMEDOUT); } +static unsigned short xs_get_random_port(void) +{ + unsigned short range = xprt_max_resvport - xprt_min_resvport; + unsigned short rand = (unsigned short) net_random() % range; + return rand + xprt_min_resvport; +} + +/** + * xs_print_peer_address - format an IPv4 address for printing + * @xprt: generic transport + * @format: flags field indicating which parts of the address to render + */ +static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) +{ + if (xprt->address_strings[format] != NULL) + return xprt->address_strings[format]; + else + return "unprintable"; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -938,8 +1023,11 @@ static void xs_udp_timer(struct rpc_task *task) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { + struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - xprt->addr.sin_port = htons(port); + + sap->sin_port = htons(port); } static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) @@ -952,7 +1040,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) do { myaddr.sin_port = htons(port); - err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, + err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); if (err == 0) { xprt->port = port; @@ -982,11 +1070,9 @@ static void xs_udp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); - /* Start by resetting any existing state */ xs_close(xprt); @@ -1000,6 +1086,9 @@ static void xs_udp_connect_worker(void *args) goto out; } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1047,7 +1136,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; - result = sock->ops->connect(sock, &any, sizeof(any), 0); + result = kernel_connect(sock, &any, sizeof(any), 0); if (result) dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -1065,11 +1154,9 @@ static void xs_tcp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); - if (!xprt->sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { @@ -1085,6 +1172,9 @@ static void xs_tcp_connect_worker(void *args) /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1117,8 +1207,8 @@ static void xs_tcp_connect_worker(void *args) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); + status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, + xprt->addrlen, O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { @@ -1226,8 +1316,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1242,8 +1334,10 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt, - .release_xprt = xprt_release_xprt, + .release_xprt = xs_tcp_release_xprt, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1264,20 +1358,20 @@ static struct rpc_xprt_ops xs_tcp_ops = { int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - - dprintk("RPC: setting up udp-ipv4 transport...\n"); + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_udp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); - xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); + xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); if (xprt->slot == NULL) return -ENOMEM; - memset(xprt->slot, 0, slot_table_size); + + if (ntohs(addr->sin_port != 0)) + xprt_set_bound(xprt); + xprt->port = xs_get_random_port(); xprt->prot = IPPROTO_UDP; - xprt->port = xprt_max_resvport; xprt->tsh_size = 0; - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -1294,6 +1388,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } @@ -1306,20 +1404,20 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - - dprintk("RPC: setting up tcp-ipv4 transport...\n"); + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_tcp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); - xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); + xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); if (xprt->slot == NULL) return -ENOMEM; - memset(xprt->slot, 0, slot_table_size); + + if (ntohs(addr->sin_port) != 0) + xprt_set_bound(xprt); + xprt->port = xs_get_random_port(); xprt->prot = IPPROTO_TCP; - xprt->port = xprt_max_resvport; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); @@ -1335,5 +1433,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 58a1b6b42ddd..cd4eafbab1b8 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -12,7 +12,6 @@ * */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 2c4ecbe50082..1bb75703f384 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -49,13 +49,19 @@ #include "name_table.h" #include "bcast.h" - #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ #define BCLINK_LOG_BUF_SIZE 0 +/* + * Loss rate for incoming broadcast frames; used to test retransmission code. + * Set to N to cause every N'th frame to be discarded; 0 => don't discard any. + */ + +#define TIPC_BCAST_LOSS_RATE 0 + /** * struct bcbearer_pair - a pair of bearers used by broadcast link * @primary: pointer to primary bearer @@ -75,7 +81,14 @@ struct bcbearer_pair { * @bearer: (non-standard) broadcast bearer structure * @media: (non-standard) broadcast media structure * @bpairs: array of bearer pairs - * @bpairs_temp: array of bearer pairs used during creation of "bpairs" + * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() + * @remains: temporary node map used by tipc_bcbearer_send() + * @remains_new: temporary node map used tipc_bcbearer_send() + * + * Note: The fields labelled "temporary" are incorporated into the bearer + * to avoid consuming potentially limited stack space through the use of + * large local variables within multicast routines. Concurrent access is + * prevented through use of the spinlock "bc_lock". */ struct bcbearer { @@ -83,6 +96,8 @@ struct bcbearer { struct media media; struct bcbearer_pair bpairs[MAX_BEARERS]; struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct node_map remains; + struct node_map remains_new; }; /** @@ -102,7 +117,7 @@ struct bclink { static struct bcbearer *bcbearer = NULL; static struct bclink *bclink = NULL; static struct link *bcl = NULL; -static spinlock_t bc_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(bc_lock); char tipc_bclink_name[] = "multicast-link"; @@ -165,21 +180,18 @@ static int bclink_ack_allowed(u32 n) * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit * - * Called with 'node' locked, bc_lock unlocked + * Called with bc_lock locked */ static void bclink_retransmit_pkt(u32 after, u32 to) { struct sk_buff *buf; - spin_lock_bh(&bc_lock); buf = bcl->first_out; while (buf && less_eq(buf_seqno(buf), after)) { buf = buf->next; } - if (buf != NULL) - tipc_link_retransmit(bcl, buf, mod(to - after)); - spin_unlock_bh(&bc_lock); + tipc_link_retransmit(bcl, buf, mod(to - after)); } /** @@ -346,8 +358,10 @@ static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 g for (; buf; buf = buf->next) { u32 seqno = buf_seqno(buf); - if (mod(seqno - prev) != 1) + if (mod(seqno - prev) != 1) { buf = NULL; + break; + } if (seqno == gap_after) break; prev = seqno; @@ -399,7 +413,10 @@ int tipc_bclink_send_msg(struct sk_buff *buf) */ void tipc_bclink_recv_pkt(struct sk_buff *buf) -{ +{ +#if (TIPC_BCAST_LOSS_RATE) + static int rx_count = 0; +#endif struct tipc_msg *msg = buf_msg(buf); struct node* node = tipc_node_find(msg_prevnode(msg)); u32 next_in; @@ -420,9 +437,13 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) tipc_node_lock(node); tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); + spin_lock_bh(&bc_lock); bcl->stats.recv_nacks++; + bcl->owner->next = node; /* remember requestor */ bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); + bcl->owner->next = NULL; + spin_unlock_bh(&bc_lock); } else { tipc_bclink_peek_nack(msg_destnode(msg), msg_bcast_tag(msg), @@ -433,6 +454,14 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) return; } +#if (TIPC_BCAST_LOSS_RATE) + if (++rx_count == TIPC_BCAST_LOSS_RATE) { + rx_count = 0; + buf_discard(buf); + return; + } +#endif + tipc_node_lock(node); receive: deferred = node->bclink.deferred_head; @@ -531,12 +560,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, { static int send_count = 0; - struct node_map *remains; - struct node_map *remains_new; - struct node_map *remains_tmp; int bp_index; int swap_time; - int err; /* Prepare buffer for broadcasting (if first time trying to send it) */ @@ -557,9 +582,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, /* Send buffer over bearers until all targets reached */ - remains = kmalloc(sizeof(struct node_map), GFP_ATOMIC); - remains_new = kmalloc(sizeof(struct node_map), GFP_ATOMIC); - *remains = tipc_cltr_bcast_nodes; + bcbearer->remains = tipc_cltr_bcast_nodes; for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct bearer *p = bcbearer->bpairs[bp_index].primary; @@ -568,8 +591,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (!p) break; /* no more bearers to try */ - tipc_nmap_diff(remains, &p->nodes, remains_new); - if (remains_new->count == remains->count) + tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); + if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* bearer pair doesn't add anything */ if (!p->publ.blocked && @@ -587,27 +610,17 @@ swap: bcbearer->bpairs[bp_index].primary = s; bcbearer->bpairs[bp_index].secondary = p; update: - if (remains_new->count == 0) { - err = TIPC_OK; - goto out; - } + if (bcbearer->remains_new.count == 0) + return TIPC_OK; - /* swap map */ - remains_tmp = remains; - remains = remains_new; - remains_new = remains_tmp; + bcbearer->remains = bcbearer->remains_new; } /* Unable to reach all targets */ bcbearer->bearer.publ.blocked = 1; bcl->stats.bearer_congs++; - err = ~TIPC_OK; - - out: - kfree(remains_new); - kfree(remains); - return err; + return ~TIPC_OK; } /** @@ -765,7 +778,7 @@ int tipc_bclink_init(void) bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC); if (!bcbearer || !bclink) { nomem: - warn("Memory squeeze; Failed to create multicast link\n"); + warn("Multicast link creation failed, no memory\n"); kfree(bcbearer); bcbearer = NULL; kfree(bclink); @@ -783,7 +796,7 @@ int tipc_bclink_init(void) memset(bclink, 0, sizeof(struct bclink)); INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; - bclink->node.lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&bclink->node.lock); bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 0e3be2ab3307..b243d9d495f0 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -180,7 +180,7 @@ static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port) if (!item->next) { item->next = kmalloc(sizeof(*item), GFP_ATOMIC); if (!item->next) { - warn("Memory squeeze: multicast destination port list is incomplete\n"); + warn("Incomplete multicast delivery, no memory\n"); return; } item->next->next = NULL; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e213a8e54855..75a5968c2139 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -112,39 +112,42 @@ int tipc_register_media(u32 media_type, goto exit; if (!media_name_valid(name)) { - warn("Media registration error: illegal name <%s>\n", name); + warn("Media <%s> rejected, illegal name\n", name); goto exit; } if (!bcast_addr) { - warn("Media registration error: no broadcast address supplied\n"); + warn("Media <%s> rejected, no broadcast address\n", name); goto exit; } if ((bearer_priority < TIPC_MIN_LINK_PRI) && (bearer_priority > TIPC_MAX_LINK_PRI)) { - warn("Media registration error: priority %u\n", bearer_priority); + warn("Media <%s> rejected, illegal priority (%u)\n", name, + bearer_priority); goto exit; } if ((link_tolerance < TIPC_MIN_LINK_TOL) || (link_tolerance > TIPC_MAX_LINK_TOL)) { - warn("Media registration error: tolerance %u\n", link_tolerance); + warn("Media <%s> rejected, illegal tolerance (%u)\n", name, + link_tolerance); goto exit; } media_id = media_count++; if (media_id >= MAX_MEDIA) { - warn("Attempt to register more than %u media\n", MAX_MEDIA); + warn("Media <%s> rejected, media limit reached (%u)\n", name, + MAX_MEDIA); media_count--; goto exit; } for (i = 0; i < media_id; i++) { if (media_list[i].type_id == media_type) { - warn("Attempt to register second media with type %u\n", + warn("Media <%s> rejected, duplicate type (%u)\n", name, media_type); media_count--; goto exit; } if (!strcmp(name, media_list[i].name)) { - warn("Attempt to re-register media name <%s>\n", name); + warn("Media <%s> rejected, duplicate name\n", name); media_count--; goto exit; } @@ -283,6 +286,9 @@ static struct bearer *bearer_find(const char *name) struct bearer *b_ptr; u32 i; + if (tipc_mode != TIPC_NET_MODE) + return NULL; + for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) return b_ptr; @@ -475,26 +481,33 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) u32 i; int res = -EINVAL; - if (tipc_mode != TIPC_NET_MODE) + if (tipc_mode != TIPC_NET_MODE) { + warn("Bearer <%s> rejected, not supported in standalone mode\n", + name); return -ENOPROTOOPT; - - if (!bearer_name_validate(name, &b_name) || - !tipc_addr_domain_valid(bcast_scope) || - !in_scope(bcast_scope, tipc_own_addr)) + } + if (!bearer_name_validate(name, &b_name)) { + warn("Bearer <%s> rejected, illegal name\n", name); return -EINVAL; - + } + if (!tipc_addr_domain_valid(bcast_scope) || + !in_scope(bcast_scope, tipc_own_addr)) { + warn("Bearer <%s> rejected, illegal broadcast scope\n", name); + return -EINVAL; + } if ((priority < TIPC_MIN_LINK_PRI || priority > TIPC_MAX_LINK_PRI) && - (priority != TIPC_MEDIA_LINK_PRI)) + (priority != TIPC_MEDIA_LINK_PRI)) { + warn("Bearer <%s> rejected, illegal priority\n", name); return -EINVAL; + } write_lock_bh(&tipc_net_lock); - if (!tipc_bearers) - goto failed; m_ptr = media_find(b_name.media_name); if (!m_ptr) { - warn("No media <%s>\n", b_name.media_name); + warn("Bearer <%s> rejected, media <%s> not registered\n", name, + b_name.media_name); goto failed; } @@ -510,23 +523,24 @@ restart: continue; } if (!strcmp(name, tipc_bearers[i].publ.name)) { - warn("Bearer <%s> already enabled\n", name); + warn("Bearer <%s> rejected, already enabled\n", name); goto failed; } if ((tipc_bearers[i].priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { - warn("Third bearer <%s> with priority %u, unable to lower to %u\n", - name, priority + 1, priority); + warn("Bearer <%s> rejected, duplicate priority\n", + name); goto failed; } - warn("Third bearer <%s> with priority %u, lowering to %u\n", + warn("Bearer <%s> priority adjustment required %u->%u\n", name, priority + 1, priority); goto restart; } } if (bearer_id >= MAX_BEARERS) { - warn("Attempt to enable more than %d bearers\n", MAX_BEARERS); + warn("Bearer <%s> rejected, bearer limit reached (%u)\n", + name, MAX_BEARERS); goto failed; } @@ -536,7 +550,7 @@ restart: strcpy(b_ptr->publ.name, name); res = m_ptr->enable_bearer(&b_ptr->publ); if (res) { - warn("Failed to enable bearer <%s>\n", name); + warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); goto failed; } @@ -552,7 +566,7 @@ restart: b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, bcast_scope, 2); } - b_ptr->publ.lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&b_ptr->publ.lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, addr_string_fill(addr_string, bcast_scope), priority); @@ -573,9 +587,6 @@ int tipc_block_bearer(const char *name) struct link *l_ptr; struct link *temp_l_ptr; - if (tipc_mode != TIPC_NET_MODE) - return -ENOPROTOOPT; - read_lock_bh(&tipc_net_lock); b_ptr = bearer_find(name); if (!b_ptr) { @@ -584,6 +595,7 @@ int tipc_block_bearer(const char *name) return -EINVAL; } + info("Blocking bearer <%s>\n", name); spin_lock_bh(&b_ptr->publ.lock); b_ptr->publ.blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { @@ -595,7 +607,6 @@ int tipc_block_bearer(const char *name) } spin_unlock_bh(&b_ptr->publ.lock); read_unlock_bh(&tipc_net_lock); - info("Blocked bearer <%s>\n", name); return TIPC_OK; } @@ -611,15 +622,13 @@ static int bearer_disable(const char *name) struct link *l_ptr; struct link *temp_l_ptr; - if (tipc_mode != TIPC_NET_MODE) - return -ENOPROTOOPT; - b_ptr = bearer_find(name); if (!b_ptr) { warn("Attempt to disable unknown bearer <%s>\n", name); return -EINVAL; } + info("Disabling bearer <%s>\n", name); tipc_disc_stop_link_req(b_ptr->link_req); spin_lock_bh(&b_ptr->publ.lock); b_ptr->link_req = NULL; @@ -635,7 +644,6 @@ static int bearer_disable(const char *name) tipc_link_delete(l_ptr); } spin_unlock_bh(&b_ptr->publ.lock); - info("Disabled bearer <%s>\n", name); memset(b_ptr, 0, sizeof(struct bearer)); return TIPC_OK; } @@ -657,11 +665,9 @@ int tipc_bearer_init(void) int res; write_lock_bh(&tipc_net_lock); - tipc_bearers = kmalloc(MAX_BEARERS * sizeof(struct bearer), GFP_ATOMIC); - media_list = kmalloc(MAX_MEDIA * sizeof(struct media), GFP_ATOMIC); + tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); + media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); if (tipc_bearers && media_list) { - memset(tipc_bearers, 0, MAX_BEARERS * sizeof(struct bearer)); - memset(media_list, 0, MAX_MEDIA * sizeof(struct media)); res = TIPC_OK; } else { kfree(tipc_bearers); diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 1aed81584e96..b46b5188a9fd 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -57,43 +57,43 @@ struct cluster *tipc_cltr_create(u32 addr) struct _zone *z_ptr; struct cluster *c_ptr; int max_nodes; - int alloc; - c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC); - if (c_ptr == NULL) + c_ptr = kzalloc(sizeof(*c_ptr), GFP_ATOMIC); + if (c_ptr == NULL) { + warn("Cluster creation failure, no memory\n"); return NULL; - memset(c_ptr, 0, sizeof(*c_ptr)); + } c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0); if (in_own_cluster(addr)) max_nodes = LOWEST_SLAVE + tipc_max_slaves; else max_nodes = tipc_max_nodes + 1; - alloc = sizeof(void *) * (max_nodes + 1); - c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC); + + c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC); if (c_ptr->nodes == NULL) { + warn("Cluster creation failure, no memory for node area\n"); kfree(c_ptr); return NULL; } - memset(c_ptr->nodes, 0, alloc); + if (in_own_cluster(addr)) tipc_local_nodes = c_ptr->nodes; c_ptr->highest_slave = LOWEST_SLAVE - 1; c_ptr->highest_node = 0; z_ptr = tipc_zone_find(tipc_zone(addr)); - if (z_ptr == NULL) { + if (!z_ptr) { z_ptr = tipc_zone_create(addr); } - if (z_ptr != NULL) { - tipc_zone_attach_cluster(z_ptr, c_ptr); - c_ptr->owner = z_ptr; - } - else { + if (!z_ptr) { + kfree(c_ptr->nodes); kfree(c_ptr); - c_ptr = NULL; + return NULL; } + tipc_zone_attach_cluster(z_ptr, c_ptr); + c_ptr->owner = z_ptr; return c_ptr; } diff --git a/net/tipc/config.c b/net/tipc/config.c index 48b5de2dbe60..285e1bc2d880 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -63,7 +63,7 @@ struct manager { static struct manager mng = { 0}; -static spinlock_t config_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(config_lock); static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ @@ -291,13 +291,22 @@ static struct sk_buff *cfg_set_own_addr(void) if (!tipc_addr_node_valid(addr)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (node address)"); - if (tipc_own_addr) + if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); + tipc_own_addr = addr; + + /* + * Must release all spinlocks before calling start_net() because + * Linux version of TIPC calls eth_media_start() which calls + * register_netdevice_notifier() which may block! + * + * Temporarily releasing the lock should be harmless for non-Linux TIPC, + * but Linux version of eth_media_start() should really be reworked + * so that it can be called with spinlocks held. + */ spin_unlock_bh(&config_lock); - tipc_core_stop_net(); - tipc_own_addr = addr; tipc_core_start_net(); spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); @@ -350,50 +359,21 @@ static struct sk_buff *cfg_set_max_subscriptions(void) static struct sk_buff *cfg_set_max_ports(void) { - int orig_mode; u32 value; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_max_ports) + return tipc_cfg_reply_none(); if (value != delimit(value, 127, 65535)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max ports must be 127-65535)"); - - if (value == tipc_max_ports) - return tipc_cfg_reply_none(); - - if (atomic_read(&tipc_user_count) > 2) + if (tipc_mode != TIPC_NOT_RUNNING) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max ports while TIPC users exist)"); - - spin_unlock_bh(&config_lock); - orig_mode = tipc_get_mode(); - if (orig_mode == TIPC_NET_MODE) - tipc_core_stop_net(); - tipc_core_stop(); + " (cannot change max ports while TIPC is active)"); tipc_max_ports = value; - tipc_core_start(); - if (orig_mode == TIPC_NET_MODE) - tipc_core_start_net(); - spin_lock_bh(&config_lock); - return tipc_cfg_reply_none(); -} - -static struct sk_buff *set_net_max(int value, int *parameter) -{ - int orig_mode; - - if (value != *parameter) { - orig_mode = tipc_get_mode(); - if (orig_mode == TIPC_NET_MODE) - tipc_core_stop_net(); - *parameter = value; - if (orig_mode == TIPC_NET_MODE) - tipc_core_start_net(); - } - return tipc_cfg_reply_none(); } @@ -405,10 +385,16 @@ static struct sk_buff *cfg_set_max_zones(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_max_zones) + return tipc_cfg_reply_none(); if (value != delimit(value, 1, 255)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max zones must be 1-255)"); - return set_net_max(value, &tipc_max_zones); + if (tipc_mode == TIPC_NET_MODE) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (cannot change max zones once TIPC has joined a network)"); + tipc_max_zones = value; + return tipc_cfg_reply_none(); } static struct sk_buff *cfg_set_max_clusters(void) @@ -419,8 +405,8 @@ static struct sk_buff *cfg_set_max_clusters(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); - if (value != 1) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + if (value != delimit(value, 1, 1)) + return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max clusters fixed at 1)"); return tipc_cfg_reply_none(); } @@ -433,10 +419,16 @@ static struct sk_buff *cfg_set_max_nodes(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_max_nodes) + return tipc_cfg_reply_none(); if (value != delimit(value, 8, 2047)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max nodes must be 8-2047)"); - return set_net_max(value, &tipc_max_nodes); + if (tipc_mode == TIPC_NET_MODE) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (cannot change max nodes once TIPC has joined a network)"); + tipc_max_nodes = value; + return tipc_cfg_reply_none(); } static struct sk_buff *cfg_set_max_slaves(void) @@ -461,15 +453,16 @@ static struct sk_buff *cfg_set_netid(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_net_id) + return tipc_cfg_reply_none(); if (value != delimit(value, 1, 9999)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network id must be 1-9999)"); - - if (tipc_own_addr) + if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change network id once part of network)"); - - return set_net_max(value, &tipc_net_id); + " (cannot change network id once TIPC has joined a network)"); + tipc_net_id = value; + return tipc_cfg_reply_none(); } struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, @@ -649,7 +642,7 @@ static void cfg_named_msg_event(void *userdata, if ((size < sizeof(*req_hdr)) || (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { - warn("discarded invalid configuration message\n"); + warn("Invalid configuration message discarded\n"); return; } diff --git a/net/tipc/core.c b/net/tipc/core.c index 3d0a8ee4e1d3..0539a8362858 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,7 +57,7 @@ void tipc_socket_stop(void); int tipc_netlink_start(void); void tipc_netlink_stop(void); -#define MOD_NAME "tipc_start: " +#define TIPC_MOD_VER "1.6.1" #ifndef CONFIG_TIPC_ZONES #define CONFIG_TIPC_ZONES 3 @@ -191,14 +191,15 @@ static int __init tipc_init(void) int res; tipc_log_reinit(CONFIG_TIPC_LOG); - info("Activated (compiled " __DATE__ " " __TIME__ ")\n"); + info("Activated (version " TIPC_MOD_VER + " compiled " __DATE__ " " __TIME__ ")\n"); tipc_own_addr = 0; tipc_remote_management = 1; tipc_max_publications = 10000; tipc_max_subscriptions = 2000; tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536); - tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 511); + tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 255); tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1); tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047); tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047); @@ -224,6 +225,7 @@ module_exit(tipc_exit); MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(TIPC_MOD_VER); /* Native TIPC API for kernel-space applications (see tipc.h) */ diff --git a/net/tipc/core.h b/net/tipc/core.h index 1f2e8b27a13f..762aac2572be 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -2,7 +2,7 @@ * net/tipc/core.h: Include file for TIPC global declarations * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -111,10 +111,6 @@ void tipc_dump(struct print_buf*,const char *fmt, ...); #else -#ifndef DBG_OUTPUT -#define DBG_OUTPUT NULL -#endif - /* * TIPC debug support not included: * - system messages are printed to system console @@ -129,6 +125,19 @@ void tipc_dump(struct print_buf*,const char *fmt, ...); #define msg_dbg(msg,txt) do {} while (0) #define dump(fmt,arg...) do {} while (0) + +/* + * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is + * the null print buffer. Thes ensures that any system or debug messages + * that are generated without using the above macros are handled correctly. + */ + +#undef TIPC_OUTPUT +#define TIPC_OUTPUT TIPC_CONS + +#undef DBG_OUTPUT +#define DBG_OUTPUT NULL + #endif @@ -288,7 +297,10 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb) * buf_acquire - creates a TIPC message buffer * @size: message size (including TIPC header) * - * Returns a new buffer. Space is reserved for a data link header. + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. */ static inline struct sk_buff *buf_acquire(u32 size) @@ -309,7 +321,7 @@ static inline struct sk_buff *buf_acquire(u32 size) * buf_discard - frees a TIPC message buffer * @skb: message buffer * - * Frees a new buffer. If passed NULL, just returns. + * Frees a message buffer. If passed NULL, just returns. */ static inline void buf_discard(struct sk_buff *skb) diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index 26ef95d5fe38..55130655e1ed 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -41,7 +41,7 @@ #define MAX_STRING 512 static char print_string[MAX_STRING]; -static spinlock_t print_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(print_lock); static struct print_buf cons_buf = { NULL, 0, NULL, NULL }; struct print_buf *TIPC_CONS = &cons_buf; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 92601385e5f5..ee94de92ae99 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -2,7 +2,7 @@ * net/tipc/discover.c * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -176,7 +176,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf) n_ptr = tipc_node_create(orig); } if (n_ptr == NULL) { - warn("Memory squeeze; Failed to create node\n"); return; } spin_lock_bh(&n_ptr->lock); @@ -191,10 +190,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf) } addr = &link->media_addr; if (memcmp(addr, &media_addr, sizeof(*addr))) { - char addr_string[16]; - - warn("New bearer address for %s\n", - addr_string_fill(addr_string, orig)); + warn("Resetting link <%s>, peer interface address changed\n", + link->name); memcpy(addr, &media_addr, sizeof(*addr)); tipc_link_reset(link); } @@ -270,8 +267,8 @@ static void disc_timeout(struct link_req *req) /* leave timer interval "as is" if already at a "normal" rate */ } else { req->timer_intv *= 2; - if (req->timer_intv > TIPC_LINK_REQ_SLOW) - req->timer_intv = TIPC_LINK_REQ_SLOW; + if (req->timer_intv > TIPC_LINK_REQ_FAST) + req->timer_intv = TIPC_LINK_REQ_FAST; if ((req->timer_intv == TIPC_LINK_REQ_FAST) && (req->bearer->nodes.count)) req->timer_intv = TIPC_LINK_REQ_SLOW; @@ -298,7 +295,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, { struct link_req *req; - req = (struct link_req *)kmalloc(sizeof(*req), GFP_ATOMIC); + req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return NULL; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 7a252785f727..682da4a28041 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -2,7 +2,7 @@ * net/tipc/eth_media.c: Ethernet bearer support for TIPC * * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -98,17 +98,19 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, u32 size; if (likely(eb_ptr->bearer)) { - size = msg_size((struct tipc_msg *)buf->data); - skb_trim(buf, size); - if (likely(buf->len == size)) { - buf->next = NULL; - tipc_recv_msg(buf, eb_ptr->bearer); - } else { - kfree_skb(buf); + if (likely(!dev->promiscuity) || + !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) || + !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) { + size = msg_size((struct tipc_msg *)buf->data); + skb_trim(buf, size); + if (likely(buf->len == size)) { + buf->next = NULL; + tipc_recv_msg(buf, eb_ptr->bearer); + return TIPC_OK; + } } - } else { - kfree_skb(buf); } + kfree_skb(buf); return TIPC_OK; } @@ -125,8 +127,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ - while (dev && dev->name && - (memcmp(dev->name, driver_name, strlen(dev->name)))) { + while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) { dev = dev->next; } if (!dev) @@ -252,7 +253,9 @@ int tipc_eth_media_start(void) if (eth_started) return -EINVAL; - memset(&bcast_addr, 0xff, sizeof(bcast_addr)); + bcast_addr.type = htonl(TIPC_MEDIA_TYPE_ETH); + memset(&bcast_addr.dev_addr, 0xff, ETH_ALEN); + memset(eth_bearers, 0, sizeof(eth_bearers)); res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth", diff --git a/net/tipc/handler.c b/net/tipc/handler.c index 966f70a1b608..ae6ddf00a1aa 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -44,7 +44,7 @@ struct queue_item { static kmem_cache_t *tipc_queue_item_cache; static struct list_head signal_queue_head; -static spinlock_t qitem_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(qitem_lock); static int handler_enabled = 0; static void process_signal_queue(unsigned long dummy); diff --git a/net/tipc/link.c b/net/tipc/link.c index 784b24b6d102..693f02eca6d6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -417,12 +417,11 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, struct tipc_msg *msg; char *if_name; - l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC); + l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); if (!l_ptr) { - warn("Memory squeeze; Failed to create link\n"); + warn("Link creation failed, no memory\n"); return NULL; } - memset(l_ptr, 0, sizeof(*l_ptr)); l_ptr->addr = peer; if_name = strchr(b_ptr->publ.name, ':') + 1; @@ -469,7 +468,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, if (!pb) { kfree(l_ptr); - warn("Memory squeeze; Failed to create link\n"); + warn("Link creation failed, no memory for print buffer\n"); return NULL; } tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE); @@ -574,7 +573,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all) break; list_del_init(&p_ptr->wait_list); p_ptr->congested_link = NULL; - assert(p_ptr->wakeup); spin_lock_bh(p_ptr->publ.lock); p_ptr->publ.congested = 0; p_ptr->wakeup(&p_ptr->publ); @@ -691,6 +689,7 @@ void tipc_link_reset(struct link *l_ptr) struct sk_buff *buf; u32 prev_state = l_ptr->state; u32 checkpoint = l_ptr->next_in_no; + int was_active_link = tipc_link_is_active(l_ptr); msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1); @@ -712,7 +711,7 @@ void tipc_link_reset(struct link *l_ptr) tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name); dbg_link_dump(); #endif - if (tipc_node_has_active_links(l_ptr->owner) && + if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && l_ptr->owner->permit_changeover) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; @@ -755,7 +754,7 @@ void tipc_link_reset(struct link *l_ptr) static void link_activate(struct link *l_ptr) { - l_ptr->next_in_no = 1; + l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); link_send_event(tipc_cfg_link_event, l_ptr, 1); @@ -820,6 +819,8 @@ static void link_state_event(struct link *l_ptr, unsigned event) break; case RESET_MSG: dbg_link("RES -> RR\n"); + info("Resetting link <%s>, requested by peer\n", + l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -844,6 +845,8 @@ static void link_state_event(struct link *l_ptr, unsigned event) break; case RESET_MSG: dbg_link("RES -> RR\n"); + info("Resetting link <%s>, requested by peer " + "while probing\n", l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -875,6 +878,8 @@ static void link_state_event(struct link *l_ptr, unsigned event) } else { /* Link has failed */ dbg_link("-> RU (%u probes unanswered)\n", l_ptr->fsm_msg_cnt); + warn("Resetting link <%s>, peer not responding\n", + l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; @@ -982,17 +987,20 @@ static int link_bundle_buf(struct link *l_ptr, struct tipc_msg *bundler_msg = buf_msg(bundler); struct tipc_msg *msg = buf_msg(buf); u32 size = msg_size(msg); - u32 to_pos = align(msg_size(bundler_msg)); - u32 rest = link_max_pkt(l_ptr) - to_pos; + u32 bundle_size = msg_size(bundler_msg); + u32 to_pos = align(bundle_size); + u32 pad = to_pos - bundle_size; if (msg_user(bundler_msg) != MSG_BUNDLER) return 0; if (msg_type(bundler_msg) != OPEN_MSG) return 0; - if (rest < align(size)) + if (skb_tailroom(bundler) < (pad + size)) + return 0; + if (link_max_pkt(l_ptr) < (to_pos + size)) return 0; - skb_put(bundler, (to_pos - msg_size(bundler_msg)) + size); + skb_put(bundler, pad + size); memcpy(bundler->data + to_pos, buf->data, size); msg_set_size(bundler_msg, to_pos + size); msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); @@ -1050,7 +1058,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) msg_dbg(msg, "TIPC: Congestion, throwing away\n"); buf_discard(buf); if (imp > CONN_MANAGER) { - warn("Resetting <%s>, send queue full", l_ptr->name); + warn("Resetting link <%s>, send queue full", l_ptr->name); tipc_link_reset(l_ptr); } return dsz; @@ -1135,9 +1143,13 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) if (n_ptr) { tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[selector & 1]; - dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest); if (l_ptr) { + dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest); res = tipc_link_send_buf(l_ptr, buf); + } else { + dbg("Attempt to send msg to unreachable node:\n"); + msg_dbg(buf_msg(buf),">>>"); + buf_discard(buf); } tipc_node_unlock(n_ptr); } else { @@ -1242,8 +1254,6 @@ int tipc_link_send_sections_fast(struct port *sender, int res; u32 selector = msg_origport(hdr) & 1; - assert(destaddr != tipc_own_addr); - again: /* * Try building message using port's max_pkt hint. @@ -1604,40 +1614,121 @@ void tipc_link_push_queue(struct link *l_ptr) tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); } +static void link_reset_all(unsigned long addr) +{ + struct node *n_ptr; + char addr_string[16]; + u32 i; + + read_lock_bh(&tipc_net_lock); + n_ptr = tipc_node_find((u32)addr); + if (!n_ptr) { + read_unlock_bh(&tipc_net_lock); + return; /* node no longer exists */ + } + + tipc_node_lock(n_ptr); + + warn("Resetting all links to %s\n", + addr_string_fill(addr_string, n_ptr->addr)); + + for (i = 0; i < MAX_BEARERS; i++) { + if (n_ptr->links[i]) { + link_print(n_ptr->links[i], TIPC_OUTPUT, + "Resetting link\n"); + tipc_link_reset(n_ptr->links[i]); + } + } + + tipc_node_unlock(n_ptr); + read_unlock_bh(&tipc_net_lock); +} + +static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + + warn("Retransmission failure on link <%s>\n", l_ptr->name); + tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>"); + + if (l_ptr->addr) { + + /* Handle failure on standard link */ + + link_print(l_ptr, TIPC_OUTPUT, "Resetting link\n"); + tipc_link_reset(l_ptr); + + } else { + + /* Handle failure on broadcast link */ + + struct node *n_ptr; + char addr_string[16]; + + tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); + tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle); + + n_ptr = l_ptr->owner->next; + tipc_node_lock(n_ptr); + + addr_string_fill(addr_string, n_ptr->addr); + tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string); + tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported); + tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked); + tipc_printf(TIPC_OUTPUT, "Last in: %u, ", n_ptr->bclink.last_in); + tipc_printf(TIPC_OUTPUT, "Gap after: %u, ", n_ptr->bclink.gap_after); + tipc_printf(TIPC_OUTPUT, "Gap to: %u\n", n_ptr->bclink.gap_to); + tipc_printf(TIPC_OUTPUT, "Nack sync: %u\n\n", n_ptr->bclink.nack_sync); + + tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); + + tipc_node_unlock(n_ptr); + + l_ptr->stale_count = 0; + } +} + void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, u32 retransmits) { struct tipc_msg *msg; + if (!buf) + return; + + msg = buf_msg(buf); + dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); - if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && buf && !skb_cloned(buf)) { - msg_dbg(buf_msg(buf), ">NO_RETR->BCONG>"); - dbg_print_link(l_ptr, " "); - l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf)); - l_ptr->retransm_queue_size = retransmits; - return; + if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { + if (!skb_cloned(buf)) { + msg_dbg(msg, ">NO_RETR->BCONG>"); + dbg_print_link(l_ptr, " "); + l_ptr->retransm_queue_head = msg_seqno(msg); + l_ptr->retransm_queue_size = retransmits; + return; + } else { + /* Don't retransmit if driver already has the buffer */ + } + } else { + /* Detect repeated retransmit failures on uncongested bearer */ + + if (l_ptr->last_retransmitted == msg_seqno(msg)) { + if (++l_ptr->stale_count > 100) { + link_retransmit_failure(l_ptr, buf); + return; + } + } else { + l_ptr->last_retransmitted = msg_seqno(msg); + l_ptr->stale_count = 1; + } } + while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) { msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { - /* Catch if retransmissions fail repeatedly: */ - if (l_ptr->last_retransmitted == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - tipc_msg_print(TIPC_CONS, buf_msg(buf), ">RETR>"); - info("...Retransmitted %u times\n", - l_ptr->stale_count); - link_print(l_ptr, TIPC_CONS, "Resetting Link\n"); - tipc_link_reset(l_ptr); - break; - } - } else { - l_ptr->stale_count = 0; - } - l_ptr->last_retransmitted = msg_seqno(msg); - msg_dbg(buf_msg(buf), ">RETR>"); buf = buf->next; retransmits--; @@ -1650,6 +1741,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, return; } } + l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; } @@ -1720,6 +1812,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) link_recv_non_seq(buf); continue; } + + if (unlikely(!msg_short(msg) && + (msg_destnode(msg) != tipc_own_addr))) + goto cont; + n_ptr = tipc_node_find(msg_prevnode(msg)); if (unlikely(!n_ptr)) goto cont; @@ -2140,7 +2237,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { - warn("Changing prio <%s>: %u->%u\n", + warn("Resetting link <%s>, priority change %u->%u\n", l_ptr->name, l_ptr->priority, msg_linkprio(msg)); l_ptr->priority = msg_linkprio(msg); tipc_link_reset(l_ptr); /* Enforce change to take effect */ @@ -2209,17 +2306,22 @@ void tipc_link_tunnel(struct link *l_ptr, u32 length = msg_size(msg); tunnel = l_ptr->owner->active_links[selector & 1]; - if (!tipc_link_is_up(tunnel)) + if (!tipc_link_is_up(tunnel)) { + warn("Link changeover error, " + "tunnel link no longer available\n"); return; + } msg_set_size(tunnel_hdr, length + INT_H_SIZE); buf = buf_acquire(length + INT_H_SIZE); - if (!buf) + if (!buf) { + warn("Link changeover error, " + "unable to send tunnel msg\n"); return; + } memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE); memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length); dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane); msg_dbg(buf_msg(buf), ">SEND>"); - assert(tunnel); tipc_link_send_buf(tunnel, buf); } @@ -2235,23 +2337,27 @@ void tipc_link_changeover(struct link *l_ptr) u32 msgcount = l_ptr->out_queue_size; struct sk_buff *crs = l_ptr->first_out; struct link *tunnel = l_ptr->owner->active_links[0]; - int split_bundles = tipc_node_has_redundant_links(l_ptr->owner); struct tipc_msg tunnel_hdr; + int split_bundles; if (!tunnel) return; - if (!l_ptr->owner->permit_changeover) + if (!l_ptr->owner->permit_changeover) { + warn("Link changeover error, " + "peer did not permit changeover\n"); return; + } msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); + dbg("Link changeover requires %u tunnel messages\n", msgcount); + if (!l_ptr->first_out) { struct sk_buff *buf; - assert(!msgcount); buf = buf_acquire(INT_H_SIZE); if (buf) { memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); @@ -2261,10 +2367,15 @@ void tipc_link_changeover(struct link *l_ptr) msg_dbg(&tunnel_hdr, "EMPTY>SEND>"); tipc_link_send_buf(tunnel, buf); } else { - warn("Memory squeeze; link changeover failed\n"); + warn("Link changeover error, " + "unable to send changeover msg\n"); } return; } + + split_bundles = (l_ptr->owner->active_links[0] != + l_ptr->owner->active_links[1]); + while (crs) { struct tipc_msg *msg = buf_msg(crs); @@ -2310,7 +2421,8 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) msg_set_size(&tunnel_hdr, length + INT_H_SIZE); outbuf = buf_acquire(length + INT_H_SIZE); if (outbuf == NULL) { - warn("Memory squeeze; buffer duplication failed\n"); + warn("Link changeover error, " + "unable to send duplicate msg\n"); return; } memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); @@ -2364,11 +2476,15 @@ static int link_recv_changeover_msg(struct link **l_ptr, u32 msg_count = msg_msgcnt(tunnel_msg); dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; - assert(dest_link != *l_ptr); if (!dest_link) { msg_dbg(tunnel_msg, "NOLINK/<REC<"); goto exit; } + if (dest_link == *l_ptr) { + err("Unexpected changeover message on link <%s>\n", + (*l_ptr)->name); + goto exit; + } dbg("%c<-%c:", dest_link->b_ptr->net_plane, (*l_ptr)->b_ptr->net_plane); *l_ptr = dest_link; @@ -2381,7 +2497,7 @@ static int link_recv_changeover_msg(struct link **l_ptr, } *buf = buf_extract(tunnel_buf,INT_H_SIZE); if (*buf == NULL) { - warn("Memory squeeze; failed to extract msg\n"); + warn("Link changeover error, duplicate msg dropped\n"); goto exit; } msg_dbg(tunnel_msg, "TNL<REC<"); @@ -2393,13 +2509,17 @@ static int link_recv_changeover_msg(struct link **l_ptr, if (tipc_link_is_up(dest_link)) { msg_dbg(tunnel_msg, "UP/FIRST/<REC<"); + info("Resetting link <%s>, changeover initiated by peer\n", + dest_link->name); tipc_link_reset(dest_link); dest_link->exp_msg_count = msg_count; + dbg("Expecting %u tunnelled messages\n", msg_count); if (!msg_count) goto exit; } else if (dest_link->exp_msg_count == START_CHANGEOVER) { msg_dbg(tunnel_msg, "BLK/FIRST/<REC<"); dest_link->exp_msg_count = msg_count; + dbg("Expecting %u tunnelled messages\n", msg_count); if (!msg_count) goto exit; } @@ -2407,6 +2527,8 @@ static int link_recv_changeover_msg(struct link **l_ptr, /* Receive original message */ if (dest_link->exp_msg_count == 0) { + warn("Link switchover error, " + "got too many tunnelled messages\n"); msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<"); dbg_print_link(dest_link, "LINK:"); goto exit; @@ -2422,7 +2544,7 @@ static int link_recv_changeover_msg(struct link **l_ptr, buf_discard(tunnel_buf); return 1; } else { - warn("Memory squeeze; dropped incoming msg\n"); + warn("Link changeover error, original msg dropped\n"); } } exit: @@ -2444,13 +2566,8 @@ void tipc_link_recv_bundle(struct sk_buff *buf) while (msgcount--) { obuf = buf_extract(buf, pos); if (obuf == NULL) { - char addr_string[16]; - - warn("Buffer allocation failure;\n"); - warn(" incoming message(s) from %s lost\n", - addr_string_fill(addr_string, - msg_orignode(buf_msg(buf)))); - return; + warn("Link unable to unbundle message(s)\n"); + break; }; pos += align(msg_size(buf_msg(obuf))); msg_dbg(buf_msg(obuf), " /"); @@ -2508,7 +2625,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) } fragm = buf_acquire(fragm_sz + INT_H_SIZE); if (fragm == NULL) { - warn("Memory squeeze; failed to fragment msg\n"); + warn("Link unable to fragment message\n"); dsz = -ENOMEM; goto exit; } @@ -2623,7 +2740,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, set_fragm_size(pbuf,fragm_sz); set_expected_frags(pbuf,exp_fragm_cnt - 1); } else { - warn("Memory squeeze; got no defragmenting buffer\n"); + warn("Link unable to reassemble fragmented message\n"); } buf_discard(fbuf); return 0; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index a3bbc891f959..f0b063bcc2a9 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -127,7 +127,7 @@ void tipc_named_publish(struct publication *publ) buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); if (!buf) { - warn("Memory squeeze; failed to distribute publication\n"); + warn("Publication distribution failure\n"); return; } @@ -151,7 +151,7 @@ void tipc_named_withdraw(struct publication *publ) buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { - warn("Memory squeeze; failed to distribute withdrawal\n"); + warn("Withdrawl distribution failure\n"); return; } @@ -174,7 +174,6 @@ void tipc_named_node_up(unsigned long node) u32 rest; u32 max_item_buf; - assert(in_own_cluster(node)); read_lock_bh(&tipc_nametbl_lock); max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE; max_item_buf *= ITEM_SIZE; @@ -185,8 +184,8 @@ void tipc_named_node_up(unsigned long node) left = (rest <= max_item_buf) ? rest : max_item_buf; rest -= left; buf = named_prepare_buf(PUBLICATION, left, node); - if (buf == NULL) { - warn("Memory Squeeze; could not send publication\n"); + if (!buf) { + warn("Bulk publication distribution failure\n"); goto exit; } item = (struct distr_item *)msg_data(buf_msg(buf)); @@ -221,15 +220,24 @@ exit: static void node_is_down(struct publication *publ) { struct publication *p; + write_lock_bh(&tipc_nametbl_lock); dbg("node_is_down: withdrawing %u, %u, %u\n", publ->type, publ->lower, publ->upper); publ->key += 1222345; p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); - assert(p == publ); write_unlock_bh(&tipc_nametbl_lock); - kfree(publ); + + if (p != publ) { + err("Unable to remove publication from failed node\n" + "(type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->ref, publ->key); + } + + if (p) { + kfree(p); + } } /** @@ -275,9 +283,15 @@ void tipc_named_recv(struct sk_buff *buf) if (publ) { tipc_nodesub_unsubscribe(&publ->subscr); kfree(publ); + } else { + err("Unable to remove publication by node 0x%x\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + msg_orignode(msg), + ntohl(item->type), ntohl(item->lower), + ntohl(item->ref), ntohl(item->key)); } } else { - warn("tipc_named_recv: unknown msg\n"); + warn("Unrecognized name table message received\n"); } item++; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index d129422fc5c2..049242ea5c38 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -71,7 +71,7 @@ struct sub_seq { * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type'; * sub-sequences are sorted in ascending order * @alloc: number of sub-sequences currently in array - * @first_free: upper bound of highest sub-sequence + 1 + * @first_free: array index of first unused sub-sequence entry * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' * @lock: spinlock controlling access to name sequence structure @@ -101,7 +101,7 @@ struct name_table { static struct name_table table = { NULL } ; static atomic_t rsv_publ_ok = ATOMIC_INIT(0); -rwlock_t tipc_nametbl_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(tipc_nametbl_lock); static int hash(int x) @@ -117,14 +117,12 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, u32 scope, u32 node, u32 port_ref, u32 key) { - struct publication *publ = - (struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC); + struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); if (publ == NULL) { - warn("Memory squeeze; failed to create publication\n"); + warn("Publication creation failure, no memory\n"); return NULL; } - memset(publ, 0, sizeof(*publ)); publ->type = type; publ->lower = lower; publ->upper = upper; @@ -144,11 +142,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, static struct sub_seq *tipc_subseq_alloc(u32 cnt) { - u32 sz = cnt * sizeof(struct sub_seq); - struct sub_seq *sseq = (struct sub_seq *)kmalloc(sz, GFP_ATOMIC); - - if (sseq) - memset(sseq, 0, sz); + struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); return sseq; } @@ -160,22 +154,20 @@ static struct sub_seq *tipc_subseq_alloc(u32 cnt) static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head) { - struct name_seq *nseq = - (struct name_seq *)kmalloc(sizeof(*nseq), GFP_ATOMIC); + struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC); struct sub_seq *sseq = tipc_subseq_alloc(1); if (!nseq || !sseq) { - warn("Memory squeeze; failed to create name sequence\n"); + warn("Name sequence creation failed, no memory\n"); kfree(nseq); kfree(sseq); return NULL; } - memset(nseq, 0, sizeof(*nseq)); - nseq->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&nseq->lock); nseq->type = type; nseq->sseqs = sseq; - dbg("tipc_nameseq_create() nseq = %x type %u, ssseqs %x, ff: %u\n", + dbg("tipc_nameseq_create(): nseq = %p, type %u, ssseqs %p, ff: %u\n", nseq, type, nseq->sseqs, nseq->first_free); nseq->alloc = 1; INIT_HLIST_NODE(&nseq->ns_list); @@ -253,16 +245,16 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, struct sub_seq *sseq; int created_subseq = 0; - assert(nseq->first_free <= nseq->alloc); sseq = nameseq_find_subseq(nseq, lower); - dbg("nameseq_ins: for seq %x,<%u,%u>, found sseq %x\n", + dbg("nameseq_ins: for seq %p, {%u,%u}, found sseq %p\n", nseq, type, lower, sseq); if (sseq) { /* Lower end overlaps existing entry => need an exact match */ if ((sseq->lower != lower) || (sseq->upper != upper)) { - warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper); + warn("Cannot publish {%u,%u,%u}, overlap error\n", + type, lower, upper); return NULL; } } else { @@ -277,25 +269,27 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, if ((inspos < nseq->first_free) && (upper >= nseq->sseqs[inspos].lower)) { - warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper); + warn("Cannot publish {%u,%u,%u}, overlap error\n", + type, lower, upper); return NULL; } /* Ensure there is space for new sub-sequence */ if (nseq->first_free == nseq->alloc) { - struct sub_seq *sseqs = nseq->sseqs; - nseq->sseqs = tipc_subseq_alloc(nseq->alloc * 2); - if (nseq->sseqs != NULL) { - memcpy(nseq->sseqs, sseqs, - nseq->alloc * sizeof (struct sub_seq)); - kfree(sseqs); - dbg("Allocated %u sseqs\n", nseq->alloc); - nseq->alloc *= 2; - } else { - warn("Memory squeeze; failed to create sub-sequence\n"); + struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2); + + if (!sseqs) { + warn("Cannot publish {%u,%u,%u}, no memory\n", + type, lower, upper); return NULL; } + dbg("Allocated %u more sseqs\n", nseq->alloc); + memcpy(sseqs, nseq->sseqs, + nseq->alloc * sizeof(struct sub_seq)); + kfree(nseq->sseqs); + nseq->sseqs = sseqs; + nseq->alloc *= 2; } dbg("Have %u sseqs for type %u\n", nseq->alloc, type); @@ -311,7 +305,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, sseq->upper = upper; created_subseq = 1; } - dbg("inserting (%u %u %u) from %x:%u into sseq %x(%u,%u) of seq %x\n", + dbg("inserting {%u,%u,%u} from <0x%x:%u> into sseq %p(%u,%u) of seq %p\n", type, lower, upper, node, port, sseq, sseq->lower, sseq->upper, nseq); @@ -320,7 +314,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, publ = publ_create(type, lower, upper, scope, node, port, key); if (!publ) return NULL; - dbg("inserting publ %x, node=%x publ->node=%x, subscr->node=%x\n", + dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n", publ, node, publ->node, publ->subscr.node); if (!sseq->zone_list) @@ -367,45 +361,47 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, /** * tipc_nameseq_remove_publ - + * + * NOTE: There may be cases where TIPC is asked to remove a publication + * that is not in the name table. For example, if another node issues a + * publication for a name sequence that overlaps an existing name sequence + * the publication will not be recorded, which means the publication won't + * be found when the name sequence is later withdrawn by that node. + * A failed withdraw request simply returns a failure indication and lets the + * caller issue any error or warning messages associated with such a problem. */ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, u32 node, u32 ref, u32 key) { struct publication *publ; + struct publication *curr; struct publication *prev; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); struct sub_seq *free; struct subscription *s, *st; int removed_subseq = 0; - assert(nseq); - - if (!sseq) { - int i; - - warn("Withdraw unknown <%u,%u>?\n", nseq->type, inst); - assert(nseq->sseqs); - dbg("Dumping subseqs %x for %x, alloc = %u,ff=%u\n", - nseq->sseqs, nseq, nseq->alloc, - nseq->first_free); - for (i = 0; i < nseq->first_free; i++) { - dbg("Subseq %u(%x): lower = %u,upper = %u\n", - i, &nseq->sseqs[i], nseq->sseqs[i].lower, - nseq->sseqs[i].upper); - } + if (!sseq) return NULL; - } - dbg("nameseq_remove: seq: %x, sseq %x, <%u,%u> key %u\n", + + dbg("tipc_nameseq_remove_publ: seq: %p, sseq %p, {%u,%u}, key %u\n", nseq, sseq, nseq->type, inst, key); + /* Remove publication from zone scope list */ + prev = sseq->zone_list; publ = sseq->zone_list->zone_list_next; while ((publ->key != key) || (publ->ref != ref) || (publ->node && (publ->node != node))) { prev = publ; publ = publ->zone_list_next; - assert(prev != sseq->zone_list); + if (prev == sseq->zone_list) { + + /* Prevent endless loop if publication not found */ + + return NULL; + } } if (publ != sseq->zone_list) prev->zone_list_next = publ->zone_list_next; @@ -416,14 +412,24 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i sseq->zone_list = NULL; } + /* Remove publication from cluster scope list, if present */ + if (in_own_cluster(node)) { prev = sseq->cluster_list; - publ = sseq->cluster_list->cluster_list_next; - while ((publ->key != key) || (publ->ref != ref) || - (publ->node && (publ->node != node))) { - prev = publ; - publ = publ->cluster_list_next; - assert(prev != sseq->cluster_list); + curr = sseq->cluster_list->cluster_list_next; + while (curr != publ) { + prev = curr; + curr = curr->cluster_list_next; + if (prev == sseq->cluster_list) { + + /* Prevent endless loop for malformed list */ + + err("Unable to de-list cluster publication\n" + "{%u%u}, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, + publ->ref, publ->key); + goto end_cluster; + } } if (publ != sseq->cluster_list) prev->cluster_list_next = publ->cluster_list_next; @@ -434,15 +440,26 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i sseq->cluster_list = NULL; } } +end_cluster: + + /* Remove publication from node scope list, if present */ if (node == tipc_own_addr) { prev = sseq->node_list; - publ = sseq->node_list->node_list_next; - while ((publ->key != key) || (publ->ref != ref) || - (publ->node && (publ->node != node))) { - prev = publ; - publ = publ->node_list_next; - assert(prev != sseq->node_list); + curr = sseq->node_list->node_list_next; + while (curr != publ) { + prev = curr; + curr = curr->node_list_next; + if (prev == sseq->node_list) { + + /* Prevent endless loop for malformed list */ + + err("Unable to de-list node publication\n" + "{%u%u}, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, + publ->ref, publ->key); + goto end_node; + } } if (publ != sseq->node_list) prev->node_list_next = publ->node_list_next; @@ -453,22 +470,18 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i sseq->node_list = NULL; } } - assert(!publ->node || (publ->node == node)); - assert(publ->ref == ref); - assert(publ->key == key); +end_node: - /* - * Contract subseq list if no more publications: - */ - if (!sseq->node_list && !sseq->cluster_list && !sseq->zone_list) { + /* Contract subseq list if no more publications for that subseq */ + + if (!sseq->zone_list) { free = &nseq->sseqs[nseq->first_free--]; memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq)); removed_subseq = 1; } - /* - * Any subscriptions waiting ? - */ + /* Notify any waiting subscriptions */ + list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscr_report_overlap(s, publ->lower, @@ -478,6 +491,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i publ->node, removed_subseq); } + return publ; } @@ -530,7 +544,7 @@ static struct name_seq *nametbl_find_seq(u32 type) seq_head = &table.types[hash(type)]; hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { if (ns->type == type) { - dbg("found %x\n", ns); + dbg("found %p\n", ns); return ns; } } @@ -543,22 +557,21 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, { struct name_seq *seq = nametbl_find_seq(type); - dbg("ins_publ: <%u,%x,%x> found %x\n", type, lower, upper, seq); + dbg("tipc_nametbl_insert_publ: {%u,%u,%u} found %p\n", type, lower, upper, seq); if (lower > upper) { - warn("Failed to publish illegal <%u,%u,%u>\n", + warn("Failed to publish illegal {%u,%u,%u}\n", type, lower, upper); return NULL; } - dbg("Publishing <%u,%u,%u> from %x\n", type, lower, upper, node); + dbg("Publishing {%u,%u,%u} from 0x%x\n", type, lower, upper, node); if (!seq) { seq = tipc_nameseq_create(type, &table.types[hash(type)]); - dbg("tipc_nametbl_insert_publ: created %x\n", seq); + dbg("tipc_nametbl_insert_publ: created %p\n", seq); } if (!seq) return NULL; - assert(seq->type == type); return tipc_nameseq_insert_publ(seq, type, lower, upper, scope, node, port, key); } @@ -572,7 +585,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, if (!seq) return NULL; - dbg("Withdrawing <%u,%u> from %x\n", type, lower, node); + dbg("Withdrawing {%u,%u} from 0x%x\n", type, lower, node); publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { @@ -738,12 +751,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, struct publication *publ; if (table.local_publ_count >= tipc_max_publications) { - warn("Failed publish: max %u local publication\n", + warn("Publication failed, local publication limit reached (%u)\n", tipc_max_publications); return NULL; } if ((type < TIPC_RESERVED_TYPES) && !atomic_read(&rsv_publ_ok)) { - warn("Failed to publish reserved name <%u,%u,%u>\n", + warn("Publication failed, reserved name {%u,%u,%u}\n", type, lower, upper); return NULL; } @@ -767,10 +780,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; - dbg("tipc_nametbl_withdraw:<%d,%d,%d>\n", type, lower, key); + dbg("tipc_nametbl_withdraw: {%u,%u}, key=%u\n", type, lower, key); write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); - if (publ) { + if (likely(publ)) { table.local_publ_count--; if (publ->scope != TIPC_NODE_SCOPE) tipc_named_withdraw(publ); @@ -780,6 +793,9 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) return 1; } write_unlock_bh(&tipc_nametbl_lock); + err("Unable to remove local publication\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + type, lower, ref, key); return 0; } @@ -787,8 +803,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) * tipc_nametbl_subscribe - add a subscription object to the name table */ -void -tipc_nametbl_subscribe(struct subscription *s) +void tipc_nametbl_subscribe(struct subscription *s) { u32 type = s->seq.type; struct name_seq *seq; @@ -800,11 +815,13 @@ tipc_nametbl_subscribe(struct subscription *s) } if (seq){ spin_lock_bh(&seq->lock); - dbg("tipc_nametbl_subscribe:found %x for <%u,%u,%u>\n", + dbg("tipc_nametbl_subscribe:found %p for {%u,%u,%u}\n", seq, type, s->seq.lower, s->seq.upper); - assert(seq->type == type); tipc_nameseq_subscribe(seq, s); spin_unlock_bh(&seq->lock); + } else { + warn("Failed to create subscription for {%u,%u,%u}\n", + s->seq.type, s->seq.lower, s->seq.upper); } write_unlock_bh(&tipc_nametbl_lock); } @@ -813,8 +830,7 @@ tipc_nametbl_subscribe(struct subscription *s) * tipc_nametbl_unsubscribe - remove a subscription object from name table */ -void -tipc_nametbl_unsubscribe(struct subscription *s) +void tipc_nametbl_unsubscribe(struct subscription *s) { struct name_seq *seq; @@ -1036,7 +1052,7 @@ int tipc_nametbl_init(void) { int array_size = sizeof(struct hlist_head) * tipc_nametbl_size; - table.types = (struct hlist_head *)kmalloc(array_size, GFP_ATOMIC); + table.types = kmalloc(array_size, GFP_ATOMIC); if (!table.types) return -ENOMEM; @@ -1049,35 +1065,20 @@ int tipc_nametbl_init(void) void tipc_nametbl_stop(void) { - struct hlist_head *seq_head; - struct hlist_node *seq_node; - struct hlist_node *tmp; - struct name_seq *seq; u32 i; if (!table.types) return; + /* Verify name table is empty, then release it */ + write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < tipc_nametbl_size; i++) { - seq_head = &table.types[i]; - hlist_for_each_entry_safe(seq, seq_node, tmp, seq_head, ns_list) { - struct sub_seq *sseq = seq->sseqs; - - for (; sseq != &seq->sseqs[seq->first_free]; sseq++) { - struct publication *publ = sseq->zone_list; - assert(publ); - do { - struct publication *next = - publ->zone_list_next; - kfree(publ); - publ = next; - } - while (publ != sseq->zone_list); - } - } + if (!hlist_empty(&table.types[i])) + err("tipc_nametbl_stop(): hash chain %u is non-null\n", i); } kfree(table.types); table.types = NULL; write_unlock_bh(&tipc_nametbl_lock); } + diff --git a/net/tipc/net.c b/net/tipc/net.c index f7c8223ddf7d..a991bf8a7f74 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -115,7 +115,7 @@ * - A local spin_lock protecting the queue of subscriber events. */ -rwlock_t tipc_net_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(tipc_net_lock); struct network tipc_net = { NULL }; struct node *tipc_net_select_remote_node(u32 addr, u32 ref) @@ -160,14 +160,11 @@ void tipc_net_send_external_routes(u32 dest) static int net_init(void) { - u32 sz = sizeof(struct _zone *) * (tipc_max_zones + 1); - memset(&tipc_net, 0, sizeof(tipc_net)); - tipc_net.zones = (struct _zone **)kmalloc(sz, GFP_ATOMIC); + tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC); if (!tipc_net.zones) { return -ENOMEM; } - memset(tipc_net.zones, 0, sz); return TIPC_OK; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 0d5db06e203f..fc6d09630ccd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2,7 +2,7 @@ * net/tipc/node.c: TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,34 +61,37 @@ struct node *tipc_node_create(u32 addr) struct node **curr_node; n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC); - if (n_ptr != NULL) { - memset(n_ptr, 0, sizeof(*n_ptr)); - n_ptr->addr = addr; - n_ptr->lock = SPIN_LOCK_UNLOCKED; - INIT_LIST_HEAD(&n_ptr->nsub); - - c_ptr = tipc_cltr_find(addr); - if (c_ptr == NULL) - c_ptr = tipc_cltr_create(addr); - if (c_ptr != NULL) { - n_ptr->owner = c_ptr; - tipc_cltr_attach_node(c_ptr, n_ptr); - n_ptr->last_router = -1; - - /* Insert node into ordered list */ - for (curr_node = &tipc_nodes; *curr_node; - curr_node = &(*curr_node)->next) { - if (addr < (*curr_node)->addr) { - n_ptr->next = *curr_node; - break; - } - } - (*curr_node) = n_ptr; - } else { - kfree(n_ptr); - n_ptr = NULL; - } - } + if (!n_ptr) { + warn("Node creation failed, no memory\n"); + return NULL; + } + + c_ptr = tipc_cltr_find(addr); + if (!c_ptr) { + c_ptr = tipc_cltr_create(addr); + } + if (!c_ptr) { + kfree(n_ptr); + return NULL; + } + + memset(n_ptr, 0, sizeof(*n_ptr)); + n_ptr->addr = addr; + spin_lock_init(&n_ptr->lock); + INIT_LIST_HEAD(&n_ptr->nsub); + n_ptr->owner = c_ptr; + tipc_cltr_attach_node(c_ptr, n_ptr); + n_ptr->last_router = -1; + + /* Insert node into ordered list */ + for (curr_node = &tipc_nodes; *curr_node; + curr_node = &(*curr_node)->next) { + if (addr < (*curr_node)->addr) { + n_ptr->next = *curr_node; + break; + } + } + (*curr_node) = n_ptr; return n_ptr; } @@ -122,6 +125,8 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) { struct link **active = &n_ptr->active_links[0]; + n_ptr->working_links++; + info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->b_ptr->net_plane); @@ -132,7 +137,7 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) return; } if (l_ptr->priority < active[0]->priority) { - info("Link is standby\n"); + info("New link <%s> becomes standby\n", l_ptr->name); return; } tipc_link_send_duplicate(active[0], l_ptr); @@ -140,8 +145,9 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) active[0] = l_ptr; return; } - info("Link <%s> on network plane %c becomes standby\n", - active[0]->name, active[0]->b_ptr->net_plane); + info("Old link <%s> becomes standby\n", active[0]->name); + if (active[1] != active[0]) + info("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; } @@ -181,6 +187,8 @@ void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr) { struct link **active; + n_ptr->working_links--; + if (!tipc_link_is_active(l_ptr)) { info("Lost standby link <%s> on network plane %c\n", l_ptr->name, l_ptr->b_ptr->net_plane); @@ -210,8 +218,7 @@ int tipc_node_has_active_links(struct node *n_ptr) int tipc_node_has_redundant_links(struct node *n_ptr) { - return (tipc_node_has_active_links(n_ptr) && - (n_ptr->active_links[0] != n_ptr->active_links[1])); + return (n_ptr->working_links > 1); } static int tipc_node_has_active_routes(struct node *n_ptr) @@ -234,7 +241,6 @@ struct node *tipc_node_attach_link(struct link *l_ptr) u32 bearer_id = l_ptr->b_ptr->identity; char addr_string[16]; - assert(bearer_id < MAX_BEARERS); if (n_ptr->link_cnt >= 2) { char addr_string[16]; @@ -249,7 +255,7 @@ struct node *tipc_node_attach_link(struct link *l_ptr) n_ptr->link_cnt++; return n_ptr; } - err("Attempt to establish second link on <%s> to <%s> \n", + err("Attempt to establish second link on <%s> to %s \n", l_ptr->b_ptr->publ.name, addr_string_fill(addr_string, l_ptr->addr)); } @@ -314,7 +320,7 @@ static void node_established_contact(struct node *n_ptr) struct cluster *c_ptr; dbg("node_established_contact:-> %x\n", n_ptr->addr); - if (!tipc_node_has_active_routes(n_ptr)) { + if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) { tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); } @@ -586,6 +592,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) struct sk_buff *buf; struct node *n_ptr; struct tipc_node_info node_info; + u32 payload_size; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -602,8 +609,11 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* For now, get space for all other nodes (will need to modify this when slave nodes are supported */ - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(node_info)) * - (tipc_max_nodes - 1)); + payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1); + if (payload_size > 32768u) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (too many nodes)"); + buf = tipc_cfg_reply_alloc(payload_size); if (!buf) return NULL; @@ -627,6 +637,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct sk_buff *buf; struct node *n_ptr; struct tipc_link_info link_info; + u32 payload_size; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -639,12 +650,15 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (!tipc_nodes) return tipc_cfg_reply_none(); - - /* For now, get space for 2 links to all other nodes + bcast link - (will need to modify this when slave nodes are supported */ - - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(link_info)) * - (2 * (tipc_max_nodes - 1) + 1)); + + /* Get space for all unicast links + multicast link */ + + payload_size = TLV_SPACE(sizeof(link_info)) * + (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1); + if (payload_size > 32768u) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (too many links)"); + buf = tipc_cfg_reply_alloc(payload_size); if (!buf) return NULL; diff --git a/net/tipc/node.h b/net/tipc/node.h index 781126e084ae..a07cc79ea637 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,6 +51,7 @@ * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node + * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @permit_changeover: non-zero if node has redundant links to this system * @routers: bitmap (used for multicluster communication) @@ -76,6 +77,7 @@ struct node { struct link *active_links[2]; struct link *links[MAX_BEARERS]; int link_cnt; + int working_links; int permit_changeover; u32 routers[512/32]; int last_router; diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index cff4068cc755..cc3fff3dec4f 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -47,18 +47,19 @@ void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down) { - node_sub->node = NULL; - if (addr == tipc_own_addr) + if (addr == tipc_own_addr) { + node_sub->node = NULL; return; - if (!tipc_addr_node_valid(addr)) { - warn("node_subscr with illegal %x\n", addr); + } + + node_sub->node = tipc_node_find(addr); + if (!node_sub->node) { + warn("Node subscription rejected, unknown node 0x%x\n", addr); return; } - node_sub->handle_node_down = handle_down; node_sub->usr_handle = usr_handle; - node_sub->node = tipc_node_find(addr); - assert(node_sub->node); + tipc_node_lock(node_sub->node); list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub); tipc_node_unlock(node_sub->node); diff --git a/net/tipc/port.c b/net/tipc/port.c index 67e96cb1e825..b9c8c6b9e94f 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -57,8 +57,8 @@ static struct sk_buff *msg_queue_head = NULL; static struct sk_buff *msg_queue_tail = NULL; -spinlock_t tipc_port_list_lock = SPIN_LOCK_UNLOCKED; -static spinlock_t queue_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(tipc_port_list_lock); +static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); @@ -168,7 +168,6 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) struct port_list *item = dp; int cnt = 0; - assert(buf); msg = buf_msg(buf); /* Create destination port list, if one wasn't supplied */ @@ -196,7 +195,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) struct sk_buff *b = skb_clone(buf, GFP_ATOMIC); if (b == NULL) { - warn("Buffer allocation failure\n"); + warn("Unable to deliver multicast message(s)\n"); msg_dbg(msg, "LOST:"); goto exit; } @@ -227,15 +226,14 @@ u32 tipc_createport_raw(void *usr_handle, struct tipc_msg *msg; u32 ref; - p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC); - if (p_ptr == NULL) { - warn("Memory squeeze; failed to create port\n"); + p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC); + if (!p_ptr) { + warn("Port creation failed, no memory\n"); return 0; } - memset(p_ptr, 0, sizeof(*p_ptr)); ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); if (!ref) { - warn("Reference Table Exhausted\n"); + warn("Port creation failed, reference table exhausted\n"); kfree(p_ptr); return 0; } @@ -810,18 +808,20 @@ static void port_dispatcher_sigh(void *dummy) void *usr_handle; int connected; int published; + u32 message_type; struct sk_buff *next = buf->next; struct tipc_msg *msg = buf_msg(buf); u32 dref = msg_destport(msg); + message_type = msg_type(msg); + if (message_type > TIPC_DIRECT_MSG) + goto reject; /* Unsupported message type */ + p_ptr = tipc_port_lock(dref); - if (!p_ptr) { - /* Port deleted while msg in queue */ - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - buf = next; - continue; - } + if (!p_ptr) + goto reject; /* Port deleted while msg in queue */ + orig.ref = msg_origport(msg); orig.node = msg_orignode(msg); up_ptr = p_ptr->user_port; @@ -832,7 +832,7 @@ static void port_dispatcher_sigh(void *dummy) if (unlikely(msg_errcode(msg))) goto err; - switch (msg_type(msg)) { + switch (message_type) { case TIPC_CONN_MSG:{ tipc_conn_msg_event cb = up_ptr->conn_msg_cb; @@ -874,6 +874,7 @@ static void port_dispatcher_sigh(void *dummy) &orig); break; } + case TIPC_MCAST_MSG: case TIPC_NAMED_MSG:{ tipc_named_msg_event cb = up_ptr->named_msg_cb; @@ -886,7 +887,8 @@ static void port_dispatcher_sigh(void *dummy) goto reject; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); - dseq.upper = dseq.lower; + dseq.upper = (message_type == TIPC_NAMED_MSG) + ? dseq.lower : msg_nameupper(msg); skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), msg_data_sz(msg), msg_importance(msg), @@ -899,7 +901,7 @@ static void port_dispatcher_sigh(void *dummy) buf = next; continue; err: - switch (msg_type(msg)) { + switch (message_type) { case TIPC_CONN_MSG:{ tipc_conn_shutdown_event cb = @@ -931,6 +933,7 @@ err: msg_data_sz(msg), msg_errcode(msg), &orig); break; } + case TIPC_MCAST_MSG: case TIPC_NAMED_MSG:{ tipc_named_msg_err_event cb = up_ptr->named_err_cb; @@ -940,7 +943,8 @@ err: break; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); - dseq.upper = dseq.lower; + dseq.upper = (message_type == TIPC_NAMED_MSG) + ? dseq.lower : msg_nameupper(msg); skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), msg_data_sz(msg), msg_errcode(msg), &dseq); @@ -1053,8 +1057,9 @@ int tipc_createport(u32 user_ref, struct port *p_ptr; u32 ref; - up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC); - if (up_ptr == NULL) { + up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); + if (!up_ptr) { + warn("Port creation failed, no memory\n"); return -ENOMEM; } ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); @@ -1165,8 +1170,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - if (!p_ptr->publ.published) - goto exit; if (!seq) { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -1193,7 +1196,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } if (list_empty(&p_ptr->publications)) p_ptr->publ.published = 0; -exit: tipc_port_unlock(p_ptr); return res; } diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 33bbf5095094..e6d6ae22ea49 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -63,7 +63,7 @@ struct ref_table tipc_ref_table = { NULL }; -static rwlock_t ref_table_lock = RW_LOCK_UNLOCKED; +static DEFINE_RWLOCK(ref_table_lock); /** * tipc_ref_table_init - create reference table for objects @@ -79,7 +79,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start) while (sz < requested_size) { sz <<= 1; } - table = (struct reference *)vmalloc(sz * sizeof(struct reference)); + table = vmalloc(sz * sizeof(*table)); if (table == NULL) return -ENOMEM; @@ -87,7 +87,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start) index_mask = sz - 1; for (i = sz - 1; i >= 0; i--) { table[i].object = NULL; - table[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&table[i].lock); table[i].data.next_plus_upper = (start & ~index_mask) + i - 1; } tipc_ref_table.entries = table; @@ -127,7 +127,14 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) u32 next_plus_upper; u32 reference = 0; - assert(tipc_ref_table.entries && object); + if (!object) { + err("Attempt to acquire reference to non-existent object\n"); + return 0; + } + if (!tipc_ref_table.entries) { + err("Reference table not found during acquisition attempt\n"); + return 0; + } write_lock_bh(&ref_table_lock); if (tipc_ref_table.first_free) { @@ -162,15 +169,28 @@ void tipc_ref_discard(u32 ref) u32 index; u32 index_mask; - assert(tipc_ref_table.entries); - assert(ref != 0); + if (!ref) { + err("Attempt to discard reference 0\n"); + return; + } + if (!tipc_ref_table.entries) { + err("Reference table not found during discard attempt\n"); + return; + } write_lock_bh(&ref_table_lock); index_mask = tipc_ref_table.index_mask; index = ref & index_mask; entry = &(tipc_ref_table.entries[index]); - assert(entry->object != 0); - assert(entry->data.reference == ref); + + if (!entry->object) { + err("Attempt to discard reference to non-existent object\n"); + goto exit; + } + if (entry->data.reference != ref) { + err("Attempt to discard non-existent reference\n"); + goto exit; + } /* mark entry as unused */ entry->object = NULL; @@ -184,6 +204,7 @@ void tipc_ref_discard(u32 ref) /* increment upper bits of entry to invalidate subsequent references */ entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1); +exit: write_unlock_bh(&ref_table_lock); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 648a734e6044..32d778448a00 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -169,12 +169,6 @@ static int tipc_create(struct socket *sock, int protocol) struct sock *sk; u32 ref; - if ((sock->type != SOCK_STREAM) && - (sock->type != SOCK_SEQPACKET) && - (sock->type != SOCK_DGRAM) && - (sock->type != SOCK_RDM)) - return -EPROTOTYPE; - if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -199,6 +193,9 @@ static int tipc_create(struct socket *sock, int protocol) sock->ops = &msg_ops; sock->state = SS_READY; break; + default: + tipc_deleteport(ref); + return -EPROTOTYPE; } sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); @@ -426,7 +423,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) return -EFAULT; - if ((ntohs(hdr.tcm_type) & 0xC000) & (!capable(CAP_NET_ADMIN))) + if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) return -EACCES; return 0; @@ -437,7 +434,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) * @iocb: (unused) * @sock: socket structure * @m: message to send - * @total_len: (unused) + * @total_len: length of message * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, @@ -458,7 +455,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if (unlikely(!dest)) return -EDESTADDRREQ; - if (unlikely(dest->family != AF_TIPC)) + if (unlikely((m->msg_namelen < sizeof(*dest)) || + (dest->family != AF_TIPC))) return -EINVAL; needs_conn = (sock->state != SS_READY); @@ -470,6 +468,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if ((tsock->p->published) || ((sock->type == SOCK_STREAM) && (total_len != 0))) return -EOPNOTSUPP; + if (dest->addrtype == TIPC_ADDR_NAME) { + tsock->p->conn_type = dest->addr.name.name.type; + tsock->p->conn_instance = dest->addr.name.name.instance; + } } if (down_interruptible(&tsock->sem)) @@ -538,7 +540,7 @@ exit: * @iocb: (unused) * @sock: socket structure * @m: message to send - * @total_len: (unused) + * @total_len: length of message * * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. * @@ -561,15 +563,15 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, return -ERESTARTSYS; } - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) - res = -EPIPE; - else - res = -ENOTCONN; - goto exit; - } - do { + if (unlikely(sock->state != SS_CONNECTED)) { + if (sock->state == SS_DISCONNECTING) + res = -EPIPE; + else + res = -ENOTCONN; + goto exit; + } + res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov); if (likely(res != -ELINKCONG)) { exit: @@ -597,7 +599,8 @@ exit: * * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success, or errno otherwise + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent */ @@ -611,6 +614,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, char __user *curr_start; int curr_left; int bytes_to_send; + int bytes_sent; int res; if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) @@ -633,11 +637,11 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, * of small iovec entries into send_packet(). */ - my_msg = *m; - curr_iov = my_msg.msg_iov; - curr_iovlen = my_msg.msg_iovlen; + curr_iov = m->msg_iov; + curr_iovlen = m->msg_iovlen; my_msg.msg_iov = &my_iov; my_msg.msg_iovlen = 1; + bytes_sent = 0; while (curr_iovlen--) { curr_start = curr_iov->iov_base; @@ -648,16 +652,18 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, ? curr_left : TIPC_MAX_USER_MSG_SIZE; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; - if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) - return res; + if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { + return bytes_sent ? bytes_sent : res; + } curr_left -= bytes_to_send; curr_start += bytes_to_send; + bytes_sent += bytes_to_send; } curr_iov++; } - return total_len; + return bytes_sent; } /** @@ -727,6 +733,7 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, u32 anc_data[3]; u32 err; u32 dest_type; + int has_name; int res; if (likely(m->msg_controllen == 0)) @@ -738,10 +745,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, if (unlikely(err)) { anc_data[0] = err; anc_data[1] = msg_data_sz(msg); - if ((res = put_cmsg(m, SOL_SOCKET, TIPC_ERRINFO, 8, anc_data))) + if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data))) return res; if (anc_data[1] && - (res = put_cmsg(m, SOL_SOCKET, TIPC_RETDATA, anc_data[1], + (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], msg_data(msg)))) return res; } @@ -751,25 +758,28 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; switch (dest_type) { case TIPC_NAMED_MSG: + has_name = 1; anc_data[0] = msg_nametype(msg); anc_data[1] = msg_namelower(msg); anc_data[2] = msg_namelower(msg); break; case TIPC_MCAST_MSG: + has_name = 1; anc_data[0] = msg_nametype(msg); anc_data[1] = msg_namelower(msg); anc_data[2] = msg_nameupper(msg); break; case TIPC_CONN_MSG: + has_name = (tport->conn_type != 0); anc_data[0] = tport->conn_type; anc_data[1] = tport->conn_instance; anc_data[2] = tport->conn_instance; break; default: - anc_data[0] = 0; + has_name = 0; } - if (anc_data[0] && - (res = put_cmsg(m, SOL_SOCKET, TIPC_DESTNAME, 12, anc_data))) + if (has_name && + (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data))) return res; return 0; @@ -960,7 +970,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, restart: if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) && (flags & MSG_DONTWAIT))) { - res = (sz_copied == 0) ? -EWOULDBLOCK : 0; + res = -EWOULDBLOCK; goto exit; } @@ -1051,7 +1061,7 @@ restart: exit: up(&tsock->sem); - return res ? res : sz_copied; + return sz_copied ? sz_copied : res; } /** @@ -1236,7 +1246,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, if (sock->state == SS_READY) return -EOPNOTSUPP; - /* MOVE THE REST OF THIS ERROR CHECKING TO send_msg()? */ + /* Issue Posix-compliant error code if socket is in the wrong state */ + if (sock->state == SS_LISTENING) return -EOPNOTSUPP; if (sock->state == SS_CONNECTING) @@ -1244,13 +1255,20 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, if (sock->state != SS_UNCONNECTED) return -EISCONN; - if ((dst->family != AF_TIPC) || - ((dst->addrtype != TIPC_ADDR_NAME) && (dst->addrtype != TIPC_ADDR_ID))) + /* + * Reject connection attempt using multicast address + * + * Note: send_msg() validates the rest of the address fields, + * so there's no need to do it here + */ + + if (dst->addrtype == TIPC_ADDR_MCAST) return -EINVAL; /* Send a 'SYN-' to destination */ m.msg_name = dest; + m.msg_namelen = destlen; if ((res = send_msg(NULL, sock, &m, 0)) < 0) { sock->state = SS_DISCONNECTING; return res; @@ -1269,10 +1287,6 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, msg = buf_msg(buf); res = auto_connect(sock, tsock, msg); if (!res) { - if (dst->addrtype == TIPC_ADDR_NAME) { - tsock->p->conn_type = dst->addr.name.name.type; - tsock->p->conn_instance = dst->addr.name.name.instance; - } if (!msg_data_sz(msg)) advance_queue(tsock); } @@ -1386,7 +1400,7 @@ exit: /** * shutdown - shutdown socket connection * @sock: socket structure - * @how: direction to close (always treated as read + write) + * @how: direction to close (unused; always treated as read + write) * * Terminates connection (if necessary), then purges socket's receive queue. * @@ -1469,7 +1483,8 @@ restart: * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol) +static int setsockopt(struct socket *sock, + int lvl, int opt, char __user *ov, int ol) { struct tipc_sock *tsock = tipc_sk(sock->sk); u32 value; @@ -1525,7 +1540,8 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol) * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, int lvl, int opt, char *ov, int *ol) +static int getsockopt(struct socket *sock, + int lvl, int opt, char __user *ov, int *ol) { struct tipc_sock *tsock = tipc_sk(sock->sk); int len; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index c5f026c7fd38..c51600ba5f4a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -266,7 +266,8 @@ static void subscr_subscribe(struct tipc_subscr *s, /* Refuse subscription if global limit exceeded */ if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) { - warn("Failed: max %u subscriptions\n", tipc_max_subscriptions); + warn("Subscription rejected, subscription limit reached (%u)\n", + tipc_max_subscriptions); subscr_terminate(subscriber); return; } @@ -274,8 +275,8 @@ static void subscr_subscribe(struct tipc_subscr *s, /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); - if (sub == NULL) { - warn("Memory squeeze; ignoring subscription\n"); + if (!sub) { + warn("Subscription rejected, no memory\n"); subscr_terminate(subscriber); return; } @@ -298,8 +299,7 @@ static void subscr_subscribe(struct tipc_subscr *s, if ((((sub->filter != TIPC_SUB_PORTS) && (sub->filter != TIPC_SUB_SERVICE))) || (sub->seq.lower > sub->seq.upper)) { - warn("Rejecting illegal subscription %u,%u,%u\n", - sub->seq.type, sub->seq.lower, sub->seq.upper); + warn("Subscription rejected, illegal request\n"); kfree(sub); subscr_terminate(subscriber); return; @@ -387,23 +387,22 @@ static void subscr_named_msg_event(void *usr_handle, dbg("subscr_named_msg_event: orig = %x own = %x,\n", orig->node, tipc_own_addr); if (size && (size != sizeof(struct tipc_subscr))) { - warn("Received tipc_subscr of invalid size\n"); + warn("Subscriber rejected, invalid subscription size\n"); return; } /* Create subscriber object */ - subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC); + subscriber = kzalloc(sizeof(struct subscriber), GFP_ATOMIC); if (subscriber == NULL) { - warn("Memory squeeze; ignoring subscriber setup\n"); + warn("Subscriber rejected, no memory\n"); return; } - memset(subscriber, 0, sizeof(struct subscriber)); INIT_LIST_HEAD(&subscriber->subscription_list); INIT_LIST_HEAD(&subscriber->subscriber_list); subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock); if (subscriber->ref == 0) { - warn("Failed to acquire subscriber reference\n"); + warn("Subscriber rejected, reference table exhausted\n"); kfree(subscriber); return; } @@ -422,7 +421,7 @@ static void subscr_named_msg_event(void *usr_handle, NULL, &subscriber->port_ref); if (subscriber->port_ref == 0) { - warn("Memory squeeze; failed to create subscription port\n"); + warn("Subscriber rejected, unable to create port\n"); tipc_ref_discard(subscriber->ref); kfree(subscriber); return; @@ -457,7 +456,7 @@ int tipc_subscr_start(void) int res = -1; memset(&topsrv, 0, sizeof (topsrv)); - topsrv.lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&topsrv.lock); INIT_LIST_HEAD(&topsrv.subscriber_list); spin_lock_bh(&topsrv.lock); diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c index 3f3f933976e9..04d1b9be9c51 100644 --- a/net/tipc/user_reg.c +++ b/net/tipc/user_reg.c @@ -67,7 +67,7 @@ struct tipc_user { static struct tipc_user *users = NULL; static u32 next_free_user = MAX_USERID + 1; -static spinlock_t reg_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(reg_lock); /** * reg_init - create TIPC user registry (but don't activate it) @@ -82,9 +82,8 @@ static int reg_init(void) spin_lock_bh(®_lock); if (!users) { - users = (struct tipc_user *)kmalloc(USER_LIST_SIZE, GFP_ATOMIC); + users = kzalloc(USER_LIST_SIZE, GFP_ATOMIC); if (users) { - memset(users, 0, USER_LIST_SIZE); for (i = 1; i <= MAX_USERID; i++) { users[i].next = i - 1; } diff --git a/net/tipc/zone.c b/net/tipc/zone.c index 2803e1b4f170..f5b00ea2d5ac 100644 --- a/net/tipc/zone.c +++ b/net/tipc/zone.c @@ -44,19 +44,23 @@ struct _zone *tipc_zone_create(u32 addr) { - struct _zone *z_ptr = NULL; + struct _zone *z_ptr; u32 z_num; - if (!tipc_addr_domain_valid(addr)) + if (!tipc_addr_domain_valid(addr)) { + err("Zone creation failed, invalid domain 0x%x\n", addr); return NULL; + } - z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC); - if (z_ptr != NULL) { - memset(z_ptr, 0, sizeof(*z_ptr)); - z_num = tipc_zone(addr); - z_ptr->addr = tipc_addr(z_num, 0, 0); - tipc_net.zones[z_num] = z_ptr; + z_ptr = kzalloc(sizeof(*z_ptr), GFP_ATOMIC); + if (!z_ptr) { + warn("Zone creation failed, insufficient memory\n"); + return NULL; } + + z_num = tipc_zone(addr); + z_ptr->addr = tipc_addr(z_num, 0, 0); + tipc_net.zones[z_num] = z_ptr; return z_ptr; } diff --git a/net/tipc/zone.h b/net/tipc/zone.h index 267999c5a240..5ab3d08602e2 100644 --- a/net/tipc/zone.h +++ b/net/tipc/zone.h @@ -2,7 +2,7 @@ * net/tipc/zone.h: Include file for TIPC zone management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ * struct _zone - TIPC zone structure * @addr: network address of zone * @clusters: array of pointers to all clusters within zone - * @links: (used for inter-zone communication) + * @links: number of (unicast) links to zone */ struct _zone { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d901465ce013..b43a27828df5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -83,7 +83,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/sched.h> @@ -118,7 +117,7 @@ #include <net/checksum.h> #include <linux/security.h> -int sysctl_unix_max_dgram_qlen = 10; +int sysctl_unix_max_dgram_qlen __read_mostly = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; DEFINE_SPINLOCK(unix_table_lock); @@ -128,6 +127,24 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +#ifdef CONFIG_SECURITY_NETWORK +static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) +{ + memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); +} + +static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->secid = *UNIXSID(skb); +} +#else +static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) +{ } + +static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) +{ } +#endif /* CONFIG_SECURITY_NETWORK */ + /* * SMP locking strategy: * hash table is protected with spinlock unix_table_lock @@ -542,6 +559,14 @@ static struct proto unix_proto = { .obj_size = sizeof(struct unix_sock), }; +/* + * AF_UNIX sockets do not interact with hardware, hence they + * dont trigger interrupts - so it's safe for them to have + * bh-unsafe locking for their sk_receive_queue.lock. Split off + * this special lock-class by reinitializing the spinlock key: + */ +static struct lock_class_key af_unix_sk_receive_queue_lock_key; + static struct sock * unix_create1(struct socket *sock) { struct sock *sk = NULL; @@ -557,6 +582,8 @@ static struct sock * unix_create1(struct socket *sock) atomic_inc(&unix_nr_socks); sock_init_data(sock,sk); + lockdep_set_class(&sk->sk_receive_queue.lock, + &af_unix_sk_receive_queue_lock_key); sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; @@ -630,11 +657,10 @@ static int unix_autobind(struct socket *sock) goto out; err = -ENOMEM; - addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); + addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); if (!addr) goto out; - memset(addr, 0, sizeof(*addr) + sizeof(short) + 16); addr->name->sun_family = AF_UNIX; atomic_set(&addr->refcnt, 1); @@ -1022,7 +1048,7 @@ restart: goto out_unlock; } - unix_state_wlock(sk); + unix_state_wlock_nested(sk); if (sk->sk_state != st) { unix_state_wunlock(sk); @@ -1290,6 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); if (siocb->scm->fp) unix_attach_fds(siocb->scm, skb); + unix_get_secdata(siocb->scm, skb); skb->h.raw = skb->data; err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); @@ -1570,6 +1597,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } siocb->scm->creds = *UNIXCREDS(skb); + unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -2032,10 +2060,7 @@ static int __init af_unix_init(void) int rc = -1; struct sk_buff *dummy_skb; - if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "%s: panic\n", __FUNCTION__); - goto out; - } + BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); rc = proto_register(&unix_proto, 1); if (rc != 0) { diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index b1265187b4a8..6f39faa15832 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -32,7 +32,6 @@ * ******************************************************************************/ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> @@ -371,12 +370,11 @@ static int wanpipe_listen_rcv (struct sk_buff *skb, struct sock *sk) * used by the ioctl call to read call information * and to execute commands. */ - if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) { + if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) { wanpipe_kill_sock_irq (newsk); release_device(dev); return -ENOMEM; } - memset(mbox_ptr, 0, sizeof(mbox_cmd_t)); memcpy(mbox_ptr,skb->data,skb->len); /* Register the lcn on which incoming call came @@ -508,11 +506,10 @@ static struct sock *wanpipe_alloc_socket(void) if ((sk = sk_alloc(PF_WANPIPE, GFP_ATOMIC, &wanpipe_proto, 1)) == NULL) return NULL; - if ((wan_opt = kmalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) { + if ((wan_opt = kzalloc(sizeof(struct wanpipe_opt), GFP_ATOMIC)) == NULL) { sk_free(sk); return NULL; } - memset(wan_opt, 0x00, sizeof(struct wanpipe_opt)); wp_sk(sk) = wan_opt; @@ -2012,10 +2009,9 @@ static int set_ioctl_cmd (struct sock *sk, void *arg) dev_put(dev); - if ((mbox_ptr = kmalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) + if ((mbox_ptr = kzalloc(sizeof(mbox_cmd_t), GFP_ATOMIC)) == NULL) return -ENOMEM; - memset(mbox_ptr, 0, sizeof(mbox_cmd_t)); wp_sk(sk)->mbox = mbox_ptr; wanpipe_link_driver(dev,sk); diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index c34833dc7cc1..9479659277ae 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -42,7 +42,6 @@ * Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. *****************************************************************************/ -#include <linux/config.h> #include <linux/stddef.h> /* offsetof(), etc. */ #include <linux/capability.h> #include <linux/errno.h> /* return codes */ @@ -643,18 +642,16 @@ static int wanrouter_device_new_if(struct wan_device *wandev, if (cnf->config_id == WANCONFIG_MPPP) { #ifdef CONFIG_WANPIPE_MULTPPP - pppdev = kmalloc(sizeof(struct ppp_device), GFP_KERNEL); + pppdev = kzalloc(sizeof(struct ppp_device), GFP_KERNEL); err = -ENOBUFS; if (pppdev == NULL) goto out; - memset(pppdev, 0, sizeof(struct ppp_device)); - pppdev->dev = kmalloc(sizeof(struct net_device), GFP_KERNEL); + pppdev->dev = kzalloc(sizeof(struct net_device), GFP_KERNEL); if (pppdev->dev == NULL) { kfree(pppdev); err = -ENOBUFS; goto out; } - memset(pppdev->dev, 0, sizeof(struct net_device)); err = wandev->new_if(wandev, (struct net_device *)pppdev, cnf); dev = pppdev->dev; #else @@ -664,11 +661,10 @@ static int wanrouter_device_new_if(struct wan_device *wandev, goto out; #endif } else { - dev = kmalloc(sizeof(struct net_device), GFP_KERNEL); + dev = kzalloc(sizeof(struct net_device), GFP_KERNEL); err = -ENOBUFS; if (dev == NULL) goto out; - memset(dev, 0, sizeof(struct net_device)); err = wandev->new_if(wandev, dev, cnf); } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index c28ba5a47209..930ea59463ad 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -20,7 +20,6 @@ * Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) *****************************************************************************/ -#include <linux/config.h> #include <linux/init.h> /* __initfunc et al. */ #include <linux/stddef.h> /* offsetof(), etc. */ #include <linux/errno.h> /* return codes */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 282ce4e40d7b..52a2726d327f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -35,7 +35,6 @@ * response */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index adfe7b8df355..47b68a301677 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -17,7 +17,6 @@ * 2000-09-04 Henner Eisen Prevent freeing a dangling skb. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index dfb80116c59f..a11837d361d2 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -17,7 +17,6 @@ * 2002/10/06 Arnaldo Carvalho de Melo seq_file support */ -#include <linux/config.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 6c5d37517035..2a3fe986b245 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -17,7 +17,6 @@ * X.25 001 Jonathan Naylor Started coding. */ -#include <linux/config.h> #include <linux/if_arp.h> #include <linux/init.h> #include <net/x25.h> diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 0c1c04322baf..0faab6332586 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -6,14 +6,24 @@ config XFRM depends on NET config XFRM_USER - tristate "IPsec user configuration interface" + tristate "Transformation user configuration interface" depends on INET && XFRM ---help--- - Support for IPsec user configuration interface used - by native Linux tools. + Support for Transformation(XFRM) user configuration interface + like IPsec used by native Linux tools. If unsure, say Y. +config XFRM_SUB_POLICY + bool "Transformation sub policy support (EXPERIMENTAL)" + depends on XFRM && EXPERIMENTAL + ---help--- + Support sub policy for developers. By using sub policy with main + one, two policies can be applied to the same packet at once. + Policy which lives shorter time in kernel should be a sub. + + If unsure, say N. + config NET_KEY tristate "PF_KEY sockets" select XFRM diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 693aac1aa833..de3c1a625a46 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -2,6 +2,7 @@ # Makefile for the XFRM subsystem. # -obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ + xfrm_input.o xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6ed3302312fb..5a0dbeb6bbe8 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -9,7 +9,6 @@ * any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> @@ -31,7 +30,8 @@ */ static struct xfrm_algo_desc aalg_list[] = { { - .name = "digest_null", + .name = "hmac(digest_null)", + .compat = "digest_null", .uinfo = { .auth = { @@ -48,7 +48,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "md5", + .name = "hmac(md5)", + .compat = "md5", .uinfo = { .auth = { @@ -65,7 +66,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha1", + .name = "hmac(sha1)", + .compat = "sha1", .uinfo = { .auth = { @@ -82,7 +84,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha256", + .name = "hmac(sha256)", + .compat = "sha256", .uinfo = { .auth = { @@ -99,7 +102,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "ripemd160", + .name = "hmac(ripemd160)", + .compat = "ripemd160", .uinfo = { .auth = { @@ -119,7 +123,8 @@ static struct xfrm_algo_desc aalg_list[] = { static struct xfrm_algo_desc ealg_list[] = { { - .name = "cipher_null", + .name = "ecb(cipher_null)", + .compat = "cipher_null", .uinfo = { .encr = { @@ -136,7 +141,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des", + .name = "cbc(des)", + .compat = "des", .uinfo = { .encr = { @@ -153,7 +159,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des3_ede", + .name = "cbc(des3_ede)", + .compat = "des3_ede", .uinfo = { .encr = { @@ -170,7 +177,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cast128", + .name = "cbc(cast128)", + .compat = "cast128", .uinfo = { .encr = { @@ -187,7 +195,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "blowfish", + .name = "cbc(blowfish)", + .compat = "blowfish", .uinfo = { .encr = { @@ -204,7 +213,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "aes", + .name = "cbc(aes)", + .compat = "aes", .uinfo = { .encr = { @@ -221,7 +231,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "serpent", + .name = "cbc(serpent)", + .compat = "serpent", .uinfo = { .encr = { @@ -238,7 +249,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "twofish", + .name = "cbc(twofish)", + .compat = "twofish", .uinfo = { .encr = { @@ -351,8 +363,8 @@ struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, - int entries, char *name, - int probe) + int entries, u32 type, u32 mask, + char *name, int probe) { int i, status; @@ -360,7 +372,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, return NULL; for (i = 0; i < entries; i++) { - if (strcmp(name, list[i].name)) + if (strcmp(name, list[i].name) && + (!list[i].compat || strcmp(name, list[i].compat))) continue; if (list[i].available) @@ -369,7 +382,7 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_alg_available(name, 0); + status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC); if (!status) break; @@ -381,19 +394,25 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) { - return xfrm_get_byname(aalg_list, aalg_entries(), name, probe); + return xfrm_get_byname(aalg_list, aalg_entries(), + CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) { - return xfrm_get_byname(ealg_list, ealg_entries(), name, probe); + return xfrm_get_byname(ealg_list, ealg_entries(), + CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) { - return xfrm_get_byname(calg_list, calg_entries(), name, probe); + return xfrm_get_byname(calg_list, calg_entries(), + CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); @@ -428,19 +447,22 @@ void xfrm_probe_algs(void) BUG_ON(in_softirq()); for (i = 0; i < aalg_entries(); i++) { - status = crypto_alg_available(aalg_list[i].name, 0); + status = crypto_has_hash(aalg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (aalg_list[i].available != status) aalg_list[i].available = status; } for (i = 0; i < ealg_entries(); i++) { - status = crypto_alg_available(ealg_list[i].name, 0); + status = crypto_has_blkcipher(ealg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (ealg_list[i].available != status) ealg_list[i].available = status; } for (i = 0; i < calg_entries(); i++) { - status = crypto_alg_available(calg_list[i].name, 0); + status = crypto_has_comp(calg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (calg_list[i].available != status) calg_list[i].available = status; } @@ -472,11 +494,12 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); /* Move to common area: it is shared with AH. */ -void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update) +int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, + int offset, int len, icv_update_fn_t icv_update) { int start = skb_headlen(skb); int i, copy = start - offset; + int err; struct scatterlist sg; /* Checksum header. */ @@ -488,10 +511,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg.length = copy; - icv_update(tfm, &sg, 1); + err = icv_update(desc, &sg, copy); + if (unlikely(err)) + return err; if ((len -= copy) == 0) - return; + return 0; offset += copy; } @@ -511,10 +536,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, sg.offset = frag->page_offset + offset-start; sg.length = copy; - icv_update(tfm, &sg, 1); + err = icv_update(desc, &sg, copy); + if (unlikely(err)) + return err; if (!(len -= copy)) - return; + return 0; offset += copy; } start = end; @@ -532,15 +559,19 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - skb_icv_walk(list, tfm, offset-start, copy, icv_update); + err = skb_icv_walk(list, desc, offset-start, + copy, icv_update); + if (unlikely(err)) + return err; if ((len -= copy) == 0) - return; + return 0; offset += copy; } start = end; } } BUG_ON(len); + return 0; } EXPORT_SYMBOL_GPL(skb_icv_walk); diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c new file mode 100644 index 000000000000..37643bb8768a --- /dev/null +++ b/net/xfrm/xfrm_hash.c @@ -0,0 +1,41 @@ +/* xfrm_hash.c: Common hash table code. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/xfrm.h> + +#include "xfrm_hash.h" + +struct hlist_head *xfrm_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +void xfrm_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h new file mode 100644 index 000000000000..6ac4e4f033ac --- /dev/null +++ b/net/xfrm/xfrm_hash.h @@ -0,0 +1,128 @@ +#ifndef _XFRM_HASH_H +#define _XFRM_HASH_H + +#include <linux/xfrm.h> +#include <linux/socket.h> + +static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +{ + return ntohl(addr->a4); +} + +static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +{ + return ntohl(addr->a6[2] ^ addr->a6[3]); +} + +static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a4 ^ saddr->a4); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, + u32 reqid, unsigned short family, + unsigned int hmask) +{ + unsigned int h = family ^ reqid; + switch (family) { + case AF_INET: + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + case AF_INET6: + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, + unsigned short family, + unsigned int hmask) +{ + unsigned int h = family; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(saddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(saddr); + break; + }; + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned int +__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, + unsigned int hmask) +{ + unsigned int h = (__force u32)spi ^ proto; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(daddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(daddr); + break; + } + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; +} + +static inline unsigned int __idx_hash(u32 index, unsigned int hmask) +{ + return (index ^ (index >> 8)) & hmask; +} + +static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +{ + xfrm_address_t *daddr = &sel->daddr; + xfrm_address_t *saddr = &sel->saddr; + unsigned int h = 0; + + switch (family) { + case AF_INET: + if (sel->prefixlen_d != 32 || + sel->prefixlen_s != 32) + return hmask + 1; + + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + if (sel->prefixlen_d != 128 || + sel->prefixlen_s != 128) + return hmask + 1; + + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +{ + unsigned int h = 0; + + switch (family) { + case AF_INET: + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); +extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); + +#endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 891a6090cc09..e8198a2c785d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(secpath_dup); /* Fetch spi and seq from ipsec header */ -int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { int offset, offset_seq; @@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) case IPPROTO_COMP: if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) return -EINVAL; - *spi = htonl(ntohs(*(u16*)(skb->h.raw + 2))); + *spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2))); *seq = 0; return 0; default: @@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) if (!pskb_may_pull(skb, 16)) return -EINVAL; - *spi = *(u32*)(skb->h.raw + offset); - *seq = *(u32*)(skb->h.raw + offset_seq); + *spi = *(__be32*)(skb->h.raw + offset); + *seq = *(__be32*)(skb->h.raw + offset_seq); return 0; } EXPORT_SYMBOL(xfrm_parse_spi); @@ -82,8 +82,6 @@ void __init xfrm_input_init(void) { secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!secpath_cachep) - panic("XFRM: failed to allocate secpath_cache\n"); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b8936926c24b..b6e2e79d7261 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -13,7 +13,6 @@ * */ -#include <linux/config.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/list.h> @@ -23,16 +22,19 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/module.h> +#include <linux/cache.h> #include <net/xfrm.h> #include <net/ip.h> +#include "xfrm_hash.h" + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list); +unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -40,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static kmem_cache_t *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; -static struct list_head xfrm_policy_gc_list = - LIST_HEAD_INIT(xfrm_policy_gc_list); +static HLIST_HEAD(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); @@ -308,12 +309,13 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) { struct xfrm_policy *policy; - policy = kmalloc(sizeof(struct xfrm_policy), gfp); + policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - memset(policy, 0, sizeof(struct xfrm_policy)); - atomic_set(&policy->refcnt, 1); + INIT_HLIST_NODE(&policy->bydst); + INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); + atomic_set(&policy->refcnt, 1); init_timer(&policy->timer); policy->timer.data = (unsigned long)policy; policy->timer.function = xfrm_policy_timer; @@ -359,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) static void xfrm_policy_gc_task(void *data) { struct xfrm_policy *policy; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; spin_lock_bh(&xfrm_policy_gc_lock); - list_splice_init(&xfrm_policy_gc_list, &gc_list); + gc_list.first = xfrm_policy_gc_list.first; + INIT_HLIST_HEAD(&xfrm_policy_gc_list); spin_unlock_bh(&xfrm_policy_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - policy = list_entry(entry, struct xfrm_policy, list); + hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) xfrm_policy_gc_kill(policy); - } } /* Rule must be locked. Release descentant resources, announce @@ -391,70 +392,275 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) } spin_lock(&xfrm_policy_gc_lock); - list_add(&policy->list, &xfrm_policy_gc_list); + hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); spin_unlock(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } +struct xfrm_policy_hash { + struct hlist_head *table; + unsigned int hmask; +}; + +static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; +static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; +static struct hlist_head *xfrm_policy_byidx __read_mostly; +static unsigned int xfrm_idx_hmask __read_mostly; +static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; + +static inline unsigned int idx_hash(u32 index) +{ + return __idx_hash(index, xfrm_idx_hmask); +} + +static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __sel_hash(sel, family, hmask); + + return (hash == hmask + 1 ? + &xfrm_policy_inexact[dir] : + xfrm_policy_bydst[dir].table + hash); +} + +static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __addr_hash(daddr, saddr, family, hmask); + + return xfrm_policy_bydst[dir].table + hash; +} + +static void xfrm_dst_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + unsigned int h; + + h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, + pol->family, nhashmask); + hlist_add_head(&pol->bydst, ndsttable+h); + } +} + +static void xfrm_idx_hash_transfer(struct hlist_head *list, + struct hlist_head *nidxtable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + unsigned int h; + + h = __idx_hash(pol->index, nhashmask); + hlist_add_head(&pol->byidx, nidxtable+h); + } +} + +static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) +{ + return ((old_hmask + 1) << 1) - 1; +} + +static void xfrm_bydst_resize(int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *ndst = xfrm_hash_alloc(nsize); + int i; + + if (!ndst) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); + + xfrm_policy_bydst[dir].table = ndst; + xfrm_policy_bydst[dir].hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); +} + +static void xfrm_byidx_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *nidx = xfrm_hash_alloc(nsize); + int i; + + if (!nidx) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); + + xfrm_policy_byidx = nidx; + xfrm_idx_hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); +} + +static inline int xfrm_bydst_should_resize(int dir, int *total) +{ + unsigned int cnt = xfrm_policy_count[dir]; + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + + if (total) + *total += cnt; + + if ((hmask + 1) < xfrm_policy_hashmax && + cnt > hmask) + return 1; + + return 0; +} + +static inline int xfrm_byidx_should_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + + if ((hmask + 1) < xfrm_policy_hashmax && + total > hmask) + return 1; + + return 0; +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + int dir, total; + + mutex_lock(&hash_resize_mutex); + + total = 0; + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + if (xfrm_bydst_should_resize(dir, &total)) + xfrm_bydst_resize(dir); + } + if (xfrm_byidx_should_resize(total)) + xfrm_byidx_resize(total); + + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(u8 type, int dir) { - u32 idx; - struct xfrm_policy *p; static u32 idx_generator; for (;;) { + struct hlist_node *entry; + struct hlist_head *list; + struct xfrm_policy *p; + u32 idx; + int found; + idx = (idx_generator | dir); idx_generator += 8; if (idx == 0) idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { - if (p->index == idx) + list = xfrm_policy_byidx + idx_hash(idx); + found = 0; + hlist_for_each_entry(p, entry, list, byidx) { + if (p->index == idx) { + found = 1; break; + } } - if (!p) + if (!found) return idx; } } +static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) +{ + u32 *p1 = (u32 *) s1; + u32 *p2 = (u32 *) s2; + int len = sizeof(struct xfrm_selector) / sizeof(u32); + int i; + + for (i = 0; i < len; i++) { + if (p1[i] != p2[i]) + return 1; + } + + return 0; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { - struct xfrm_policy *pol, **p; - struct xfrm_policy *delpol = NULL; - struct xfrm_policy **newpos = NULL; + struct xfrm_policy *pol; + struct xfrm_policy *delpol; + struct hlist_head *chain; + struct hlist_node *entry, *newpos, *last; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { - if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && + chain = policy_hash_bysel(&policy->selector, policy->family, dir); + delpol = NULL; + newpos = NULL; + last = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (!delpol && + pol->type == policy->type && + !selector_cmp(&pol->selector, &policy->selector) && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; } - *p = pol->next; delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - p = &pol->next; + last = &pol->bydst; continue; } if (!newpos) - newpos = p; + newpos = &pol->bydst; if (delpol) break; - p = &pol->next; + last = &pol->bydst; } + if (!newpos) + newpos = last; if (newpos) - p = newpos; + hlist_add_after(newpos, &policy->bydst); + else + hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - policy->next = *p; - *p = policy; + xfrm_policy_count[dir]++; atomic_inc(&flow_cache_genid); - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); + if (delpol) { + hlist_del(&delpol->bydst); + hlist_del(&delpol->byidx); + xfrm_policy_count[dir]--; + } + policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); + hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -463,10 +669,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) xfrm_policy_kill(delpol); + else if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; - for (policy = policy->next; policy; policy = policy->next) { + entry = &policy->bydst; + hlist_for_each_entry_continue(policy, entry, bydst) { struct dst_entry *dst; write_lock(&policy->lock); @@ -495,87 +704,146 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && - (xfrm_sec_ctx_match(ctx, pol->security))) { + chain = policy_hash_bysel(sel, sel->family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (pol->type == type && + !selector_cmp(sel, &pol->selector) && + xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if (pol->index == id) { + chain = xfrm_policy_byidx + idx_hash(id); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, byidx) { + if (pol->type == type && pol->index == id) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(void) +void xfrm_policy_flush(u8 type) { - struct xfrm_policy *xp; int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; + struct xfrm_policy *pol; + struct hlist_node *entry; + int i; + + again1: + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_kill(xp); + xfrm_policy_kill(pol); write_lock_bh(&xfrm_policy_lock); + goto again1; + } + + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + again2: + hlist_for_each_entry(pol, entry, + xfrm_policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_kill(pol); + + write_lock_bh(&xfrm_policy_lock); + goto again2; + } } + + xfrm_policy_count[dir] = 0; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *xp; - int dir; - int count = 0; - int error = 0; + struct xfrm_policy *pol; + struct hlist_node *entry; + int dir, count, error; read_lock_bh(&xfrm_policy_lock); + count = 0; for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) - count++; + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type == type) + count++; + } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type == type) + count++; + } + } } if (count == 0) { @@ -584,13 +852,28 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { - error = func(xp, dir%XFRM_POLICY_MAX, --count, data); + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + error = func(pol, dir % XFRM_POLICY_MAX, --count, data); if (error) goto out; } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type != type) + continue; + error = func(pol, dir % XFRM_POLICY_MAX, --count, data); + if (error) + goto out; + } + } } - + error = 0; out: read_unlock_bh(&xfrm_policy_lock); return error; @@ -599,29 +882,79 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, + u8 type, u16 family, int dir) { - struct xfrm_policy *pol; + struct xfrm_selector *sel = &pol->selector; + int match; - read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { - struct xfrm_selector *sel = &pol->selector; - int match; + if (pol->family != family || + pol->type != type) + return 0; - if (pol->family != family) - continue; + match = xfrm_selector_match(sel, fl, family); + if (match) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) + return 1; + } - match = xfrm_selector_match(sel, fl, family); + return 0; +} - if (match) { - if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { - xfrm_pol_hold(pol); - break; - } +static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, + u16 family, u8 dir) +{ + struct xfrm_policy *pol, *ret; + xfrm_address_t *daddr, *saddr; + struct hlist_node *entry; + struct hlist_head *chain; + u32 priority = ~0U; + + daddr = xfrm_flowi_daddr(fl, family); + saddr = xfrm_flowi_saddr(fl, family); + if (unlikely(!daddr || !saddr)) + return NULL; + + read_lock_bh(&xfrm_policy_lock); + chain = policy_hash_direct(daddr, saddr, family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir)) { + ret = pol; + priority = ret->priority; + break; } } + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir) && + pol->priority < priority) { + ret = pol; + break; + } + } + if (ret) + xfrm_pol_hold(ret); read_unlock_bh(&xfrm_policy_lock); + + return ret; +} + +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) +{ + struct xfrm_policy *pol; + +#ifdef CONFIG_XFRM_SUB_POLICY + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol) + goto end; +#endif + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + +#ifdef CONFIG_XFRM_SUB_POLICY +end: +#endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; } @@ -643,7 +976,7 @@ static inline int policy_to_flow_dir(int dir) }; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) { struct xfrm_policy *pol; @@ -654,7 +987,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) - err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); + err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); if (match && !err) xfrm_pol_hold(pol); @@ -667,24 +1000,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - pol->next = xfrm_policy_list[dir]; - xfrm_policy_list[dir] = pol; + struct hlist_head *chain = policy_hash_bysel(&pol->selector, + pol->family, dir); + + hlist_add_head(&pol->bydst, chain); + hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); + xfrm_policy_count[dir]++; xfrm_pol_hold(pol); + + if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **polp; + if (hlist_unhashed(&pol->bydst)) + return NULL; - for (polp = &xfrm_policy_list[dir]; - *polp != NULL; polp = &(*polp)->next) { - if (*polp == pol) { - *polp = pol->next; - return pol; - } - } - return NULL; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + + return pol; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) @@ -706,12 +1044,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) + return -EINVAL; +#endif + write_lock_bh(&xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -740,6 +1083,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; + newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&xfrm_policy_lock); @@ -763,17 +1107,32 @@ int __xfrm_sk_clone_policy(struct sock *sk) return 0; } +static int +xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->get_saddr(local, remote); + xfrm_policy_put_afinfo(afinfo); + return err; +} + /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) { int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + xfrm_address_t tmp; for (nx=0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; @@ -781,9 +1140,15 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode) { + if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; + if (xfrm_addr_any(local, family)) { + error = xfrm_get_saddr(&tmp, remote, family); + if (error) + goto fail; + local = &tmp; + } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); @@ -811,6 +1176,45 @@ fail: return error; } +static int +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + struct xfrm_state *tp[XFRM_MAX_DEPTH]; + struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; + int cnx = 0; + int error; + int ret; + int i; + + for (i = 0; i < npols; i++) { + if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { + error = -ENOBUFS; + goto fail; + } + + ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); + if (ret < 0) { + error = ret; + goto fail; + } else + cnx += ret; + } + + /* found states are sorted for outbound processing */ + if (npols > 1) + xfrm_state_sort(xfrm, tpp, cnx, family); + + return cnx; + + fail: + for (cnx--; cnx>=0; cnx--) + xfrm_state_put(tpp[cnx]); + return error; + +} + /* Check that the bundle accepts the flow and its components are * still valid. */ @@ -857,6 +1261,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols; + int pol_dead; + int xfrm_nr; + int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; @@ -864,19 +1273,26 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - u32 sk_sid = security_sk_sid(sk, fl, dir); + restart: genid = atomic_read(&flow_cache_genid); policy = NULL; + for (pi = 0; pi < ARRAY_SIZE(pols); pi++) + pols[pi] = NULL; + npols = 0; + pol_dead = 0; + xfrm_nr = 0; + if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + if ((dst_orig->flags & DST_NOXFRM) || + !xfrm_policy_count[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, + policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); } @@ -885,6 +1301,9 @@ restart: family = dst_orig->ops->family; policy->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = policy; + npols ++; + xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { case XFRM_POLICY_BLOCK: @@ -893,11 +1312,13 @@ restart: goto error; case XFRM_POLICY_ALLOW: +#ifndef CONFIG_XFRM_SUB_POLICY if (policy->xfrm_nr == 0) { /* Flow passes not transformed. */ xfrm_pol_put(policy); return 0; } +#endif /* Try to find matching bundle. * @@ -913,7 +1334,36 @@ restart: if (dst) break; - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (pols[1]->action == XFRM_POLICY_BLOCK) { + err = -EPERM; + goto error; + } + npols ++; + xfrm_nr += pols[1]->xfrm_nr; + } + } + + /* + * Because neither flowi nor bundle information knows about + * transformation template size. On more than one policy usage + * we can realize whether all of them is bypass or not after + * they are searched. See above not-transformed bypass + * is surrounded by non-sub policy configuration, too. + */ + if (xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pols_put(pols, npols); + return 0; + } + +#endif + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (unlikely(nx<0)) { err = nx; @@ -926,7 +1376,7 @@ restart: set_current_state(TASK_RUNNING); remove_wait_queue(&km_waitq, &wait); - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { err = -ERESTART; @@ -934,7 +1384,7 @@ restart: } if (nx == -EAGAIN || genid != atomic_read(&flow_cache_genid)) { - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); goto restart; } err = nx; @@ -944,7 +1394,7 @@ restart: } if (nx == 0) { /* Flow passes not transformed. */ - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; } @@ -958,8 +1408,14 @@ restart: goto error; } + for (pi = 0; pi < npols; pi++) { + read_lock_bh(&pols[pi]->lock); + pol_dead |= pols[pi]->dead; + read_unlock_bh(&pols[pi]->lock); + } + write_lock_bh(&policy->lock); - if (unlikely(policy->dead || stale_bundle(dst))) { + if (unlikely(pol_dead || stale_bundle(dst))) { /* Wow! While we worked on resolving, this * policy has gone. Retry. It is not paranoia, * we just cannot enlist new bundle to dead object. @@ -979,17 +1435,34 @@ restart: } *dst_p = dst; dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; error: dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); *dst_p = NULL; return err; } EXPORT_SYMBOL(xfrm_lookup); +static inline int +xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +{ + struct xfrm_state *x; + int err; + + if (!skb->sp || idx < 0 || idx >= skb->sp->len) + return 0; + x = skb->sp->xvec[idx]; + if (!x->type->reject) + return 0; + xfrm_state_hold(x); + err = x->type->reject(x, skb, fl); + xfrm_state_put(x); + return err; +} + /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must @@ -1006,10 +1479,19 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - (tmpl->aalgos & (1<<x->props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); + ((tmpl->aalgos & (1<<x->props.aalgo)) || + !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && + !(x->props.mode != XFRM_MODE_TRANSPORT && + xfrm_state_addr_cmp(tmpl, x, family)); } +/* + * 0 or more than 0 is returned when validation is succeeded (either bypass + * because of optional transport mode, or next index of the mathced secpath + * state with the template. + * -1 is returned when no matching template is found. + * Otherwise "-2 - errored_index" is returned. + */ static inline int xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, unsigned short family) @@ -1017,15 +1499,18 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, int idx = start; if (tmpl->optional) { - if (!tmpl->mode) + if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (start == -1) + start = -2-idx; break; + } } return start; } @@ -1034,21 +1519,25 @@ int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); + err = security_xfrm_decode_session(skb, &fl->secid); xfrm_policy_put_afinfo(afinfo); - return 0; + return err; } EXPORT_SYMBOL(xfrm_decode_session); -static inline int secpath_has_tunnel(struct sec_path *sp, int k) +static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { + *idxp = k; return 1; + } } return 0; @@ -1058,16 +1547,18 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols = 0; + int xfrm_nr; + int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); - u32 sk_sid; + int xerr_idx = -1; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); - sk_sid = security_sk_sid(sk, &fl, fl_dir); - /* First, check used SA against their selectors. */ if (skb->sp) { int i; @@ -1081,46 +1572,90 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); + pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (!pol) - pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, + pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); - if (!pol) - return !skb->sp || !secpath_has_tunnel(skb->sp, 0); + if (!pol) { + if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { + xfrm_secpath_reject(xerr_idx, skb, &fl); + return 0; + } + return 1; + } pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = pol; + npols ++; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + &fl, family, + XFRM_POLICY_IN); + if (pols[1]) { + pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; + npols ++; + } + } +#endif + if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; + struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl **tpp = tp; + int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; + for (pi = 0; pi < npols; pi++) { + if (pols[pi] != pol && + pols[pi]->action != XFRM_POLICY_ALLOW) + goto reject; + if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) + goto reject_error; + for (i = 0; i < pols[pi]->xfrm_nr; i++) + tpp[ti++] = &pols[pi]->xfrm_vec[i]; + } + xfrm_nr = ti; + if (npols > 1) { + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + tpp = stp; + } + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); - if (k < 0) + for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(tpp[i], sp, k, family); + if (k < 0) { + if (k < -1) + /* "-2 - errored_index" returned */ + xerr_idx = -(2+k); goto reject; + } } - if (secpath_has_tunnel(sp, k)) + if (secpath_has_nontransport(sp, k, &xerr_idx)) goto reject; - xfrm_pol_put(pol); + xfrm_pols_put(pols, npols); return 1; } reject: - xfrm_pol_put(pol); + xfrm_secpath_reject(xerr_idx, skb, &fl); +reject_error: + xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); @@ -1136,18 +1671,39 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } EXPORT_SYMBOL(__xfrm_route_forward); +/* Optimize later using cookies and generation ids. */ + static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* If it is marked obsolete, which is how we even get here, - * then we have purged it from the policy bundle list and we - * did that for a good reason. + /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + * to "-1" to force all XFRM destinations to get validated by + * dst_ops->check on every use. We do this because when a + * normal route referenced by an XFRM dst is obsoleted we do + * not go looking around for all parent referencing XFRM dsts + * so that we can invalidate them. It is just too much work. + * Instead we make the checks here on every use. For example: + * + * XFRM dst A --> IPv4 dst X + * + * X is the "xdst->route" of A (X is also the "dst->path" of A + * in this example). If X is marked obsolete, "A" will not + * notice. That's what we are validating here via the + * stale_bundle() check. + * + * When a policy's bundle is pruned, we dst_free() the XFRM + * dst which causes it's ->obsolete field to be set to a + * positive non-zero integer. If an XFRM dst has been pruned + * like this, we want to force a new route lookup. */ + if (dst->obsolete < 0 && !stale_bundle(dst)) + return dst; + return NULL; } static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1177,33 +1733,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } +static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +{ + struct dst_entry *dst, **dstp; + + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = *gc_list_p; + *gc_list_p = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); +} + static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) { - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; + struct dst_entry *gc_list = NULL; + int dir; read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy *pol; + struct hlist_node *entry; + struct hlist_head *table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) + prune_one_bundle(pol, func, &gc_list); + + table = xfrm_policy_bydst[dir].table; + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) + prune_one_bundle(pol, func, &gc_list); } } read_unlock_bh(&xfrm_policy_lock); while (gc_list) { - dst = gc_list; + struct dst_entry *dst = gc_list; gc_list = dst->next; dst_free(dst); } @@ -1219,22 +1792,12 @@ static void __xfrm_garbage_collect(void) xfrm_prune_bundles(unused_bundle); } -int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(void) { xfrm_prune_bundles(stale_bundle); return 0; } -static int always_true(struct dst_entry *dst) -{ - return 1; -} - -void xfrm_flush_all_bundles(void) -{ - xfrm_prune_bundles(always_true); -} - void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -1262,7 +1825,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) +int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1279,8 +1842,16 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; + if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) + return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (xdst->genid != dst->xfrm->genid) + return 0; + + if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) + return 0; mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { @@ -1429,12 +2000,33 @@ static struct notifier_block xfrm_dev_notifier = { static void __init xfrm_policy_init(void) { + unsigned int hmask, sz; + int dir; + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!xfrm_dst_cache) - panic("XFRM: failed to allocate xfrm_dst_cache\n"); + + hmask = 8 - 1; + sz = (hmask+1) * sizeof(struct hlist_head); + + xfrm_policy_byidx = xfrm_hash_alloc(sz); + xfrm_idx_hmask = hmask; + if (!xfrm_policy_byidx) + panic("XFRM: failed to allocate byidx hash\n"); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + + htab = &xfrm_policy_bydst[dir]; + htab->table = xfrm_hash_alloc(sz); + htab->hmask = hmask; + if (!htab->table) + panic("XFRM: failed to allocate bydst hash\n"); + } INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); register_netdevice_notifier(&xfrm_dev_notifier); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 17b29ec3c417..f927b7330f02 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,8 +18,11 @@ #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/module.h> +#include <linux/cache.h> #include <asm/uaccess.h> +#include "xfrm_hash.h" + struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); @@ -32,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by daddr to find what SAs exist for given + 2. Hash table by (daddr,family,reqid) to find what SAs exist for given destination/tunnel endpoint. (output) */ @@ -44,8 +47,126 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static struct hlist_head *xfrm_state_bydst __read_mostly; +static struct hlist_head *xfrm_state_bysrc __read_mostly; +static struct hlist_head *xfrm_state_byspi __read_mostly; +static unsigned int xfrm_state_hmask __read_mostly; +static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static unsigned int xfrm_state_num; +static unsigned int xfrm_state_genid; + +static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, + u32 reqid, + unsigned short family) +{ + return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); +} + +static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, + unsigned short family) +{ + return __xfrm_src_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int +xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +{ + return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); +} + +static void xfrm_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + struct hlist_head *nsrctable, + struct hlist_head *nspitable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_state *x; + + hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + unsigned int h; + + h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family, + nhashmask); + hlist_add_head(&x->bydst, ndsttable+h); + + h = __xfrm_src_hash(&x->props.saddr, x->props.family, + nhashmask); + hlist_add_head(&x->bysrc, nsrctable+h); + + if (x->id.spi) { + h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, + x->id.proto, x->props.family, + nhashmask); + hlist_add_head(&x->byspi, nspitable+h); + } + } +} + +static unsigned long xfrm_hash_new_size(void) +{ + return ((xfrm_state_hmask + 1) << 1) * + sizeof(struct hlist_head); +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + unsigned long nsize, osize; + unsigned int nhashmask, ohashmask; + int i; + + mutex_lock(&hash_resize_mutex); + + nsize = xfrm_hash_new_size(); + ndst = xfrm_hash_alloc(nsize); + if (!ndst) + goto out_unlock; + nsrc = xfrm_hash_alloc(nsize); + if (!nsrc) { + xfrm_hash_free(ndst, nsize); + goto out_unlock; + } + nspi = xfrm_hash_alloc(nsize); + if (!nspi) { + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); + goto out_unlock; + } + + spin_lock_bh(&xfrm_state_lock); + + nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + for (i = xfrm_state_hmask; i >= 0; i--) + xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + nhashmask); + + odst = xfrm_state_bydst; + osrc = xfrm_state_bysrc; + ospi = xfrm_state_byspi; + ohashmask = xfrm_state_hmask; + + xfrm_state_bydst = ndst; + xfrm_state_bysrc = nsrc; + xfrm_state_byspi = nspi; + xfrm_state_hmask = nhashmask; + + spin_unlock_bh(&xfrm_state_lock); + + osize = (ohashmask + 1) * sizeof(struct hlist_head); + xfrm_hash_free(odst, osize); + xfrm_hash_free(osrc, osize); + xfrm_hash_free(ospi, osize); + +out_unlock: + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -54,11 +175,9 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); -static int xfrm_state_gc_flush_bundles; - int __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); @@ -69,14 +188,13 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid); static void xfrm_state_gc_destroy(struct xfrm_state *x) { - if (del_timer(&x->timer)) - BUG(); - if (del_timer(&x->rtimer)) - BUG(); + del_timer_sync(&x->timer); + del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -90,22 +208,17 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(void *data) { struct xfrm_state *x; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); - - if (xfrm_state_gc_flush_bundles) { - xfrm_state_gc_flush_bundles = 0; - xfrm_flush_bundles(); - } + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - list_splice_init(&xfrm_state_gc_list, &gc_list); + gc_list.first = xfrm_state_gc_list.first; + INIT_HLIST_HEAD(&xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - x = list_entry(entry, struct xfrm_state, bydst); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) xfrm_state_gc_destroy(x); - } + wake_up(&km_waitq); } @@ -168,9 +281,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX && - !mod_timer(&x->timer, jiffies + make_jiffies(next))) - xfrm_state_hold(x); + if (next != LONG_MAX) + mod_timer(&x->timer, jiffies + make_jiffies(next)); + goto out; expired: @@ -185,7 +298,6 @@ expired: out: spin_unlock(&x->lock); - xfrm_state_put(x); } static void xfrm_replay_timer_handler(unsigned long data); @@ -194,14 +306,14 @@ struct xfrm_state *xfrm_state_alloc(void) { struct xfrm_state *x; - x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC); + x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { - memset(x, 0, sizeof(struct xfrm_state)); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->byspi); + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; x->timer.data = (unsigned long)x; @@ -226,7 +338,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) BUG_TRAP(x->km.state == XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -239,27 +351,12 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - list_del(&x->bydst); - __xfrm_state_put(x); - if (x->id.spi) { - list_del(&x->byspi); - __xfrm_state_put(x); - } + hlist_del(&x->bydst); + hlist_del(&x->bysrc); + if (x->id.spi) + hlist_del(&x->byspi); + xfrm_state_num--; spin_unlock(&xfrm_state_lock); - if (del_timer(&x->timer)) - __xfrm_state_put(x); - if (del_timer(&x->rtimer)) - __xfrm_state_put(x); - - /* The number two in this test is the reference - * mentioned in the comment below plus the reference - * our caller holds. A larger value means that - * there are DSTs attached to this xfrm_state. - */ - if (atomic_read(&x->refcnt) > 2) { - xfrm_state_gc_flush_bundles = 1; - schedule_work(&xfrm_state_gc_work); - } /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -288,14 +385,15 @@ EXPORT_SYMBOL(xfrm_state_delete); void xfrm_state_flush(u8 proto) { int i; - struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { + struct hlist_node *entry; + struct xfrm_state *x; restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); @@ -326,29 +424,103 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } +static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + struct xfrm_state *x; + struct hlist_node *entry; + + hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_src_hash(saddr, family); + struct xfrm_state *x; + struct hlist_node *entry; + + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + if (x->props.family != family || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6) || + !ipv6_addr_equal((struct in6_addr *)saddr, + (struct in6_addr *) + x->props.saddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) +{ + if (use_spi) + return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + x->id.proto, family); + else + return __xfrm_state_lookup_byaddr(&x->id.daddr, + &x->props.saddr, + x->id.proto, family); +} + struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); + struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; - struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(family); - if (afinfo == NULL) { - *err = -EAFNOSUPPORT; - return NULL; - } - spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && @@ -368,7 +540,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) + !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || best->km.dying > x->km.dying || @@ -380,7 +552,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) + security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } } @@ -389,8 +561,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto)) != NULL) { + (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + tmpl->id.proto, family)) != NULL) { xfrm_state_put(x0); error = -EEXIST; goto out; @@ -404,17 +576,24 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + goto out; + } + if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(saddr, family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); } else { @@ -430,59 +609,167 @@ out: else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h; - list_add(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); + x->genid = ++xfrm_state_genid; - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); - list_add(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); + h = xfrm_src_hash(&x->props.saddr, x->props.family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - if (!mod_timer(&x->timer, jiffies + HZ)) - xfrm_state_hold(x); + if (x->id.spi) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family); - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - xfrm_state_hold(x); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); + } + + mod_timer(&x->timer, jiffies + HZ); + if (x->replay_maxage) + mod_timer(&x->rtimer, jiffies + x->replay_maxage); wake_up(&km_waitq); + + xfrm_state_num++; + + if (x->bydst.next != NULL && + (xfrm_state_hmask + 1) < xfrm_state_hashmax && + xfrm_state_num > xfrm_state_hmask) + schedule_work(&xfrm_hash_work); +} + +/* xfrm_state_lock is held */ +static void __xfrm_state_bump_genids(struct xfrm_state *xnew) +{ + unsigned short family = xnew->props.family; + u32 reqid = xnew->props.reqid; + struct xfrm_state *x; + struct hlist_node *entry; + unsigned int h; + + h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && + !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) + x->genid = xfrm_state_genid; + } } void xfrm_state_insert(struct xfrm_state *x) { spin_lock_bh(&xfrm_state_lock); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); spin_unlock_bh(&xfrm_state_lock); - - xfrm_flush_all_bundles(); } EXPORT_SYMBOL(xfrm_state_insert); +/* xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +{ + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.reqid != reqid || + x->props.mode != mode || + x->props.family != family || + x->km.state != XFRM_STATE_ACQ || + x->id.spi != 0) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr) || + !ipv6_addr_equal((struct in6_addr *) + x->props.saddr.a6, + (struct in6_addr *)saddr)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + if (!create) + return NULL; + + x = xfrm_state_alloc(); + if (likely(x)) { + switch (family) { + case AF_INET: + x->sel.daddr.a4 = daddr->a4; + x->sel.saddr.a4 = saddr->a4; + x->sel.prefixlen_d = 32; + x->sel.prefixlen_s = 32; + x->props.saddr.a4 = saddr->a4; + x->id.daddr.a4 = daddr->a4; + break; + + case AF_INET6: + ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, + (struct in6_addr *)daddr); + ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, + (struct in6_addr *)saddr); + x->sel.prefixlen_d = 128; + x->sel.prefixlen_s = 128; + ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, + (struct in6_addr *)saddr); + ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr); + break; + }; + + x->km.state = XFRM_STATE_ACQ; + x->id.proto = proto; + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); + x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x->timer); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(saddr, family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + wake_up(&km_waitq); + } + + return x; +} + static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int family; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -490,7 +777,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (x->km.seq) { + if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -498,20 +785,17 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (!x1) - x1 = afinfo->find_acq( - x->props.mode, x->props.reqid, x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + if (use_spi && !x1) + x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x->id.proto, + &x->id.daddr, &x->props.saddr, 0); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); - - if (!err) - xfrm_flush_all_bundles(); if (x1) { xfrm_state_delete(x1); @@ -524,16 +808,12 @@ EXPORT_SYMBOL(xfrm_state_add); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; - - afinfo = xfrm_state_get_afinfo(x->props.family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; if (!x1) @@ -553,7 +833,6 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (err) return err; @@ -569,11 +848,15 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - if (!mod_timer(&x1->timer, jiffies + HZ)) - xfrm_state_hold(x1); + mod_timer(&x1->timer, jiffies + HZ); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -598,8 +881,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - if (!mod_timer(&x->timer, jiffies)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies); return -EINVAL; } @@ -637,50 +919,97 @@ err: EXPORT_SYMBOL(xfrm_state_check); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, +xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); + x = __xfrm_state_lookup(daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * +xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + struct xfrm_state *x; + + spin_lock_bh(&xfrm_state_lock); + x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); + spin_unlock_bh(&xfrm_state_lock); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); + +struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; + + spin_lock_bh(&xfrm_state_lock); + x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); + spin_unlock_bh(&xfrm_state_lock); + + return x; +} +EXPORT_SYMBOL(xfrm_find_acq); + +#ifdef CONFIG_XFRM_SUB_POLICY +int +xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, + unsigned short family) +{ + int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) - return NULL; + return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + if (afinfo->tmpl_sort) + err = afinfo->tmpl_sort(dst, src, n); spin_unlock_bh(&xfrm_state_lock); xfrm_state_put_afinfo(afinfo); - return x; + return err; } -EXPORT_SYMBOL(xfrm_find_acq); +EXPORT_SYMBOL(xfrm_tmpl_sort); + +int +xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->state_sort) + err = afinfo->state_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_state_sort); +#endif /* Silly enough, but I'm lazy to build resolution list */ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - struct xfrm_state *x; - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + for (i = 0; i <= xfrm_state_hmask; i++) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && + x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -714,9 +1043,9 @@ u32 xfrm_get_acqseq(void) EXPORT_SYMBOL(xfrm_get_acqseq); void -xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) +xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi) { - u32 h; + unsigned int h; struct xfrm_state *x0; if (x->id.spi) @@ -731,10 +1060,10 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) x->id.spi = minspi; } else { u32 spi = 0; - minspi = ntohl(minspi); - maxspi = ntohl(maxspi); - for (h=0; h<maxspi-minspi+1; h++) { - spi = minspi + net_random()%(maxspi-minspi+1); + u32 low = ntohl(minspi); + u32 high = ntohl(maxspi); + for (h=0; h<high-low+1; h++) { + spi = low + net_random()%(high-low+1); x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); @@ -746,8 +1075,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); } @@ -759,13 +1087,14 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), { int i; struct xfrm_state *x; + struct hlist_node *entry; int count = 0; int err = 0; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + for (i = 0; i <= xfrm_state_hmask; i++) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (xfrm_id_proto_match(x->id.proto, proto)) count++; } } @@ -774,9 +1103,9 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), goto out; } - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + for (i = 0; i <= xfrm_state_hmask; i++) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); if (err) @@ -833,10 +1162,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) km_state_notify(x, &c); if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { - xfrm_state_hold(x); + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; - } } EXPORT_SYMBOL(xfrm_replay_notify); @@ -854,14 +1181,12 @@ static void xfrm_replay_timer_handler(unsigned long data) } spin_unlock(&x->lock); - xfrm_state_put(x); } -int xfrm_replay_check(struct xfrm_state *x, u32 seq) +int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq) { u32 diff; - - seq = ntohl(seq); + u32 seq = ntohl(net_seq); if (unlikely(seq == 0)) return -EINVAL; @@ -883,11 +1208,10 @@ int xfrm_replay_check(struct xfrm_state *x, u32 seq) } EXPORT_SYMBOL(xfrm_replay_check); -void xfrm_replay_advance(struct xfrm_state *x, u32 seq) +void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) { u32 diff; - - seq = ntohl(seq); + u32 seq = ntohl(net_seq); if (seq > x->replay.seq) { diff = seq - x->replay.seq; @@ -998,6 +1322,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(proto, sel, addr); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_report); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; @@ -1019,7 +1362,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; @@ -1066,11 +1409,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) write_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else { - afinfo->state_bydst = xfrm_state_bydst; - afinfo->state_byspi = xfrm_state_byspi; + else xfrm_state_afinfo[afinfo->family] = afinfo; - } write_unlock_bh(&xfrm_state_afinfo_lock); return err; } @@ -1087,11 +1427,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { + else xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bydst = NULL; - } } write_unlock_bh(&xfrm_state_afinfo_lock); return err; @@ -1164,8 +1501,6 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) return res; } -EXPORT_SYMBOL(xfrm_state_mtu); - int xfrm_init_state(struct xfrm_state *x) { struct xfrm_state_afinfo *afinfo; @@ -1209,12 +1544,17 @@ EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { - int i; + unsigned int sz; + + sz = sizeof(struct hlist_head) * 8; + + xfrm_state_bydst = xfrm_hash_alloc(sz); + xfrm_state_bysrc = xfrm_hash_alloc(sz); + xfrm_state_byspi = xfrm_hash_alloc(sz); + if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) + panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); + xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); - for (i=0; i<XFRM_DST_HSIZE; i++) { - INIT_LIST_HEAD(&xfrm_state_bydst[i]); - INIT_LIST_HEAD(&xfrm_state_byspi[i]); - } INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c21dc26141ea..c59a78d2923a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -10,6 +10,7 @@ * */ +#include <linux/crypto.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> @@ -27,6 +28,9 @@ #include <net/xfrm.h> #include <net/netlink.h> #include <asm/uaccess.h> +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#include <linux/in6.h> +#endif static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { @@ -86,6 +90,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } +static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, + xfrm_address_t **addrp) +{ + struct rtattr *rt = xfrma[type - 1]; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) + return -EINVAL; + + if (addrp) + *addrp = RTA_DATA(rt); + + return 0; +} static inline int verify_sec_ctx_len(struct rtattr **xfrma) { @@ -156,6 +176,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + if (xfrma[XFRMA_ALG_COMP-1] || + xfrma[XFRMA_ALG_AUTH-1] || + xfrma[XFRMA_ALG_CRYPT-1] || + xfrma[XFRMA_ENCAP-1] || + xfrma[XFRMA_SEC_CTX-1] || + !xfrma[XFRMA_COADDR-1]) + goto out; + break; +#endif + default: goto out; }; @@ -170,11 +203,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(xfrma))) goto out; + if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + goto out; err = -EINVAL; switch (p->mode) { - case 0: - case 1: + case XFRM_MODE_TRANSPORT: + case XFRM_MODE_TUNNEL: + case XFRM_MODE_ROUTEOPTIMIZATION: break; default: @@ -212,6 +248,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return -ENOMEM; memcpy(p, ualg, len); + strcpy(p->alg_name, algo->name); *algpp = p; return 0; } @@ -258,6 +295,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) return security_xfrm_state_alloc(x, uctx); } +static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + xfrm_address_t *p, *uaddrp; + + if (!rta) + return 0; + + uaddrp = RTA_DATA(rta); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, uaddrp, sizeof(*p)); + *addrpp = p; + return 0; +} + static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -347,7 +402,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - + if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + goto error; err = xfrm_init_state(x); if (err) goto error; @@ -416,16 +472,48 @@ out: return err; } +static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = NULL; + int err; + + if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { + err = -ESRCH; + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + } else { + xfrm_address_t *saddr = NULL; + + err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); + if (err) + goto out; + + if (!saddr) { + err = -EINVAL; + goto out; + } + + x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, + p->family); + } + + out: + if (!x && errp) + *errp = err; + return x; +} + static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct xfrm_state *x; - int err; + int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) - return -ESRCH; + return err; if ((err = security_xfrm_state_delete(x)) != 0) goto out; @@ -519,6 +607,13 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) uctx->ctx_len = x->security->ctx_len; memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); } + + if (x->coaddr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + + if (x->lastused) + RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; @@ -540,7 +635,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); + (void) xfrm_state_walk(0, dump_one_state, &info); cb->args[0] = info.this_idx; return skb->len; @@ -576,10 +671,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err; + int err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - err = -ESRCH; + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) goto out_noput; @@ -692,6 +786,22 @@ static int verify_policy_dir(__u8 dir) return 0; } +static int verify_policy_type(__u8 type) +{ + switch (type) { + case XFRM_POLICY_TYPE_MAIN: +#ifdef CONFIG_XFRM_SUB_POLICY + case XFRM_POLICY_TYPE_SUB: +#endif + break; + + default: + return -EINVAL; + }; + + return 0; +} + static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { switch (p->share) { @@ -785,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) return 0; } +static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct xfrm_userpolicy_type *upt; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + if (rt) { + if (rt->rta_len < sizeof(*upt)) + return -EINVAL; + + upt = RTA_DATA(rt); + type = upt->type; + } + + err = verify_policy_type(type); + if (err) + return err; + + *tp = type; + return 0; +} + static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; @@ -823,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); + err = copy_from_user_policy_type(&xp->type, xfrma); + if (err) + goto error; + if (!(err = copy_from_user_tmpl(xp, xfrma))) err = copy_from_user_sec_ctx(xp, xfrma); - - if (err) { - *errp = err; - kfree(xp); - xp = NULL; - } + if (err) + goto error; return xp; + error: + *errp = err; + kfree(xp); + return NULL; } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) @@ -909,27 +1046,63 @@ rtattr_failure: return -1; } -static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) { - if (xp->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - xp->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; + struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); + struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = s->ctx_doi; + uctx->ctx_alg = s->ctx_alg; + uctx->ctx_len = s->ctx_len; + memcpy(uctx + 1, s->ctx_str, s->ctx_len); + return 0; - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = xp->security->ctx_doi; - uctx->ctx_alg = xp->security->ctx_alg; - uctx->ctx_len = xp->security->ctx_len; - memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); + rtattr_failure: + return -1; +} + +static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) +{ + if (x->security) { + return copy_sec_ctx(x->security, skb); } return 0; +} - rtattr_failure: +static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +{ + if (xp->security) { + return copy_sec_ctx(xp->security, skb); + } + return 0; +} + +#ifdef CONFIG_XFRM_SUB_POLICY +static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + struct xfrm_userpolicy_type upt; + + memset(&upt, 0, sizeof(upt)); + upt.type = xp->type; + + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + + return 0; + +rtattr_failure: return -1; } +#else +static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + return 0; +} +#endif + static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -953,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; out: @@ -974,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(dump_one_policy, &info); + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); +#ifdef CONFIG_XFRM_SUB_POLICY + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); +#endif cb->args[0] = info.this_idx; return skb->len; @@ -1010,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; @@ -1017,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + err = verify_policy_dir(p->dir); if (err) return err; if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, delete); + xp = xfrm_policy_byid(type, p->dir, p->index, delete); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1039,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } if (xp == NULL) @@ -1222,9 +1405,16 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { -struct km_event c; + struct km_event c; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; - xfrm_policy_flush(); + xfrm_policy_flush(type); + c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; @@ -1237,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); struct xfrm_userpolicy_info *p = &up->pol; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, 0); + xp = xfrm_policy_byid(type, p->dir, p->index, 0); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1257,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0); security_xfrm_policy_free(&tmp); } @@ -1384,6 +1579,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), }; #undef XMSGSIZE @@ -1435,7 +1631,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (security_netlink_recv(skb)) { + if (security_netlink_recv(skb, CAP_NET_ADMIN)) { *errp = -EPERM; return -1; } @@ -1708,7 +1904,9 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) + if (copy_to_user_state_sec_ctx(x, skb)) + goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -1742,7 +1940,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, +static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; @@ -1750,7 +1948,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, struct xfrm_policy *xp; int nr; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1790,8 +1988,18 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, } copy_from_user_policy(xp, p); + xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); + if (!xp->security) { + int err = security_xfrm_sock_policy_alloc(xp, sk); + if (err) { + kfree(xp); + *dir = err; + return NULL; + } + } + *dir = p->dir; return xp; @@ -1814,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; upe->hard = !!hard; nlh->nlmsg_len = skb->tail - b; @@ -1885,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -1902,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned char *b; +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_userpolicy_type upt; +#endif int len = NLMSG_LENGTH(0); skb = alloc_skb(len, GFP_ATOMIC); @@ -1911,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); + nlh->nlmsg_flags = 0; + +#ifdef CONFIG_XFRM_SUB_POLICY + memset(&upt, 0, sizeof(upt)); + upt.type = c->data.type; + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); +#endif nlh->nlmsg_len = skb->tail - b; @@ -1918,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: +#ifdef CONFIG_XFRM_SUB_POLICY +rtattr_failure: +#endif kfree_skb(skb); return -1; } @@ -1942,19 +2167,64 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static int build_report(struct sk_buff *skb, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct xfrm_user_report *ur; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); + ur = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + ur->proto = proto; + memcpy(&ur->sel, sel, sizeof(ur->sel)); + + if (addr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, + xfrm_address_t *addr) +{ + struct sk_buff *skb; + size_t len; + + len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_report(skb, proto, sel, addr) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, + .report = xfrm_send_report, }; static int __init xfrm_user_init(void) { struct sock *nlsk; - printk(KERN_INFO "Initializing IPsec netlink socket\n"); + printk(KERN_INFO "Initializing XFRM netlink socket\n"); nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, THIS_MODULE); |