diff options
Diffstat (limited to 'net')
186 files changed, 4486 insertions, 2791 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 52077ca22072..6e64f7c6a2e9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -272,13 +272,11 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } - new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name, - vlan_setup, real_dev->num_tx_queues); + new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup); if (new_dev == NULL) return -ENOBUFS; - netif_copy_real_num_queues(new_dev, real_dev); dev_net_set(new_dev, net); /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. @@ -334,12 +332,15 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; vlandev->gso_max_size = dev->gso_max_size; + + if (dev->features & NETIF_F_HW_VLAN_TX) + vlandev->hard_header_len = dev->hard_header_len; + else + vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; + #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif - vlandev->real_num_tx_queues = dev->real_num_tx_queues; - BUG_ON(vlandev->real_num_tx_queues > vlandev->num_tx_queues); - if (old_features != vlandev->features) netdev_features_change(vlandev); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index db01b3181fdc..5687c9b95f33 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -19,19 +19,25 @@ struct vlan_priority_tci_mapping { /** - * struct vlan_rx_stats - VLAN percpu rx stats + * struct vlan_pcpu_stats - VLAN percpu rx/tx stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes * @rx_multicast: number of received multicast packets + * @tx_packets: number of transmitted packets + * @tx_bytes: number of transmitted bytes * @syncp: synchronization point for 64bit counters - * @rx_errors: number of errors + * @rx_errors: number of rx errors + * @tx_dropped: number of tx drops */ -struct vlan_rx_stats { +struct vlan_pcpu_stats { u64 rx_packets; u64 rx_bytes; u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; struct u64_stats_sync syncp; - unsigned long rx_errors; + u32 rx_errors; + u32 tx_dropped; }; /** @@ -45,9 +51,7 @@ struct vlan_rx_stats { * @real_dev: underlying netdevice * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry - * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX - * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX - * @vlan_rx_stats: ptr to percpu rx stats + * @vlan_pcpu_stats: ptr to percpu rx stats */ struct vlan_dev_info { unsigned int nr_ingress_mappings; @@ -62,9 +66,7 @@ struct vlan_dev_info { unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; - unsigned long cnt_inc_headroom_on_tx; - unsigned long cnt_encap_on_xmit; - struct vlan_rx_stats __percpu *vlan_rx_stats; + struct vlan_pcpu_stats __percpu *vlan_pcpu_stats; }; static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 69b2f79800a5..ce8e3ab3e7a5 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp) struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; - struct vlan_rx_stats *rx_stats; + struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); if (!vlan_dev) { @@ -26,7 +26,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp) skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; - rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats); + rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 14e3d1fa07a0..be737539f34d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -141,7 +141,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; - struct vlan_rx_stats *rx_stats; + struct vlan_pcpu_stats *rx_stats; struct net_device *vlan_dev; u16 vlan_id; u16 vlan_tci; @@ -177,7 +177,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, } else { skb->dev = vlan_dev; - rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats); + rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; @@ -274,9 +274,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, u16 vlan_tci = 0; int rc; - if (WARN_ON(skb_headroom(skb) < dev->hard_header_len)) - return -ENOSPC; - if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) { vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); @@ -313,8 +310,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int i = skb_get_queue_mapping(skb); - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; @@ -326,71 +321,31 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { - unsigned int orig_headroom = skb_headroom(skb); u16 vlan_tci; - - vlan_dev_info(dev)->cnt_encap_on_xmit++; - vlan_tci = vlan_dev_info(dev)->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_put_tag(skb, vlan_tci); - if (!skb) { - txq->tx_dropped++; - return NETDEV_TX_OK; - } - - if (orig_headroom < VLAN_HLEN) - vlan_dev_info(dev)->cnt_inc_headroom_on_tx++; + skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb_set_dev(skb, vlan_dev_info(dev)->real_dev); len = skb->len; ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - txq->tx_packets++; - txq->tx_bytes += len; - } else - txq->tx_dropped++; + struct vlan_pcpu_stats *stats; - return ret; -} - -static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - int i = skb_get_queue_mapping(skb); - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - u16 vlan_tci; - unsigned int len; - int ret; - - vlan_tci = vlan_dev_info(dev)->vlan_id; - vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); - skb = __vlan_hwaccel_put_tag(skb, vlan_tci); - - skb->dev = vlan_dev_info(dev)->real_dev; - len = skb->len; - ret = dev_queue_xmit(skb); - - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - txq->tx_packets++; - txq->tx_bytes += len; - } else - txq->tx_dropped++; + stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += len; + u64_stats_update_begin(&stats->syncp); + } else { + this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); + } return ret; } -static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb) -{ - struct net_device *rdev = vlan_dev_info(dev)->real_dev; - const struct net_device_ops *ops = rdev->netdev_ops; - - return ops->ndo_select_queue(rdev, skb); -} - static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) { /* TODO: gotta make sure the underlying layer can handle it, @@ -719,8 +674,7 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; -static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops, - vlan_netdev_ops_sq, vlan_netdev_accel_ops_sq; +static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { @@ -738,6 +692,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_PRESENT); dev->features |= real_dev->features & real_dev->vlan_features; + dev->features |= NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; /* ipv6 shared card related stuff */ @@ -755,26 +710,20 @@ static int vlan_dev_init(struct net_device *dev) if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; - if (real_dev->netdev_ops->ndo_select_queue) - dev->netdev_ops = &vlan_netdev_accel_ops_sq; - else - dev->netdev_ops = &vlan_netdev_accel_ops; } else { dev->header_ops = &vlan_header_ops; dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; - if (real_dev->netdev_ops->ndo_select_queue) - dev->netdev_ops = &vlan_netdev_ops_sq; - else - dev->netdev_ops = &vlan_netdev_ops; } + dev->netdev_ops = &vlan_netdev_ops; + if (is_vlan_dev(real_dev)) subclass = 1; vlan_dev_set_lockdep_class(dev, subclass); - vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats); - if (!vlan_dev_info(dev)->vlan_rx_stats) + vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); + if (!vlan_dev_info(dev)->vlan_pcpu_stats) return -ENOMEM; return 0; @@ -786,8 +735,8 @@ static void vlan_dev_uninit(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); int i; - free_percpu(vlan->vlan_rx_stats); - vlan->vlan_rx_stats = NULL; + free_percpu(vlan->vlan_pcpu_stats); + vlan->vlan_pcpu_stats = NULL; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { while ((pm = vlan->egress_priority_map[i]) != NULL) { vlan->egress_priority_map[i] = pm->next; @@ -825,33 +774,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev) static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - dev_txq_stats_fold(dev, stats); - if (vlan_dev_info(dev)->vlan_rx_stats) { - struct vlan_rx_stats *p, accum = {0}; + if (vlan_dev_info(dev)->vlan_pcpu_stats) { + struct vlan_pcpu_stats *p; + u32 rx_errors = 0, tx_dropped = 0; int i; for_each_possible_cpu(i) { - u64 rxpackets, rxbytes, rxmulticast; + u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; unsigned int start; - p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); + p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i); do { start = u64_stats_fetch_begin_bh(&p->syncp); rxpackets = p->rx_packets; rxbytes = p->rx_bytes; rxmulticast = p->rx_multicast; + txpackets = p->tx_packets; + txbytes = p->tx_bytes; } while (u64_stats_fetch_retry_bh(&p->syncp, start)); - accum.rx_packets += rxpackets; - accum.rx_bytes += rxbytes; - accum.rx_multicast += rxmulticast; - /* rx_errors is ulong, not protected by syncp */ - accum.rx_errors += p->rx_errors; + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; + stats->multicast += rxmulticast; + stats->tx_packets += txpackets; + stats->tx_bytes += txbytes; + /* rx_errors & tx_dropped are u32 */ + rx_errors += p->rx_errors; + tx_dropped += p->tx_dropped; } - stats->rx_packets = accum.rx_packets; - stats->rx_bytes = accum.rx_bytes; - stats->rx_errors = accum.rx_errors; - stats->multicast = accum.rx_multicast; + stats->rx_errors = rx_errors; + stats->tx_dropped = tx_dropped; } return stats; } @@ -908,80 +861,6 @@ static const struct net_device_ops vlan_netdev_ops = { #endif }; -static const struct net_device_ops vlan_netdev_accel_ops = { - .ndo_change_mtu = vlan_dev_change_mtu, - .ndo_init = vlan_dev_init, - .ndo_uninit = vlan_dev_uninit, - .ndo_open = vlan_dev_open, - .ndo_stop = vlan_dev_stop, - .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = vlan_dev_set_mac_address, - .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, - .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, - .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, - .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, - .ndo_fcoe_enable = vlan_dev_fcoe_enable, - .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, -#endif -}; - -static const struct net_device_ops vlan_netdev_ops_sq = { - .ndo_select_queue = vlan_dev_select_queue, - .ndo_change_mtu = vlan_dev_change_mtu, - .ndo_init = vlan_dev_init, - .ndo_uninit = vlan_dev_uninit, - .ndo_open = vlan_dev_open, - .ndo_stop = vlan_dev_stop, - .ndo_start_xmit = vlan_dev_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = vlan_dev_set_mac_address, - .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, - .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, - .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, - .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, - .ndo_fcoe_enable = vlan_dev_fcoe_enable, - .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, -#endif -}; - -static const struct net_device_ops vlan_netdev_accel_ops_sq = { - .ndo_select_queue = vlan_dev_select_queue, - .ndo_change_mtu = vlan_dev_change_mtu, - .ndo_init = vlan_dev_init, - .ndo_uninit = vlan_dev_uninit, - .ndo_open = vlan_dev_open, - .ndo_stop = vlan_dev_stop, - .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = vlan_dev_set_mac_address, - .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, - .ndo_change_rx_flags = vlan_dev_change_rx_flags, - .ndo_do_ioctl = vlan_dev_ioctl, - .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, - .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, - .ndo_fcoe_enable = vlan_dev_fcoe_enable, - .ndo_fcoe_disable = vlan_dev_fcoe_disable, - .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, -#endif -}; - void vlan_setup(struct net_device *dev) { ether_setup(dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index ddc105734af7..be9a5c19a775 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -101,25 +101,6 @@ static int vlan_changelink(struct net_device *dev, return 0; } -static int vlan_get_tx_queues(struct net *net, - struct nlattr *tb[], - unsigned int *num_tx_queues, - unsigned int *real_num_tx_queues) -{ - struct net_device *real_dev; - - if (!tb[IFLA_LINK]) - return -EINVAL; - - real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - - *num_tx_queues = real_dev->num_tx_queues; - *real_num_tx_queues = real_dev->real_num_tx_queues; - return 0; -} - static int vlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -237,7 +218,6 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .maxtype = IFLA_VLAN_MAX, .policy = vlan_policy, .priv_size = sizeof(struct vlan_dev_info), - .get_tx_queues = vlan_get_tx_queues, .setup = vlan_setup, .validate = vlan_validate, .newlink = vlan_newlink, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 80e280f56686..d1314cf18adf 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -280,7 +280,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - static const char fmt[] = "%30s %12lu\n"; static const char fmt64[] = "%30s %12llu\n"; int i; @@ -299,10 +298,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) seq_puts(seq, "\n"); seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); - seq_printf(seq, fmt, "total headroom inc", - dev_info->cnt_inc_headroom_on_tx); - seq_printf(seq, fmt, "total encap on xmit", - dev_info->cnt_encap_on_xmit); seq_printf(seq, "Device: %s", dev_info->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, "\nINGRESS priority mappings: " diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 45c15f491401..798beac7f100 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -27,31 +27,16 @@ #include <linux/module.h> #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/stddef.h> #include <linux/types.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "protocol.h" -#ifndef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef offset_of -#define offset_of(type, memb) \ - ((unsigned long)(&((type *)0)->memb)) -#endif -#ifndef container_of -#define container_of(obj, type, memb) \ - ((type *)(((char *)obj) - offset_of(type, memb))) -#endif - static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); @@ -104,7 +89,7 @@ EXPORT_SYMBOL(p9stat_free); static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { - size_t len = MIN(pdu->size - pdu->offset, size); + size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); pdu->offset += len; return size - len; @@ -112,7 +97,7 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) { - size_t len = MIN(pdu->capacity - pdu->size, size); + size_t len = min(pdu->capacity - pdu->size, size); memcpy(&pdu->sdata[pdu->size], data, len); pdu->size += len; return size - len; @@ -121,7 +106,7 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) static size_t pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) { - size_t len = MIN(pdu->capacity - pdu->size, size); + size_t len = min(pdu->capacity - pdu->size, size); if (copy_from_user(&pdu->sdata[pdu->size], udata, len)) len = 0; @@ -201,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (errcode) break; - size = MAX(len, 0); + size = max_t(int16_t, len, 0); *sptr = kmalloc(size + 1, GFP_KERNEL); if (*sptr == NULL) { @@ -256,8 +241,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = - MIN(*count, - pdu->size - pdu->offset); + min_t(int32_t, *count, + pdu->size - pdu->offset); *data = &pdu->sdata[pdu->offset]; } } @@ -421,7 +406,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); int16_t len = 0; if (sptr) - len = MIN(strlen(sptr), USHRT_MAX); + len = min_t(int16_t, strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); diff --git a/net/Kconfig b/net/Kconfig index 55fd82e9ffd9..126c2af0fc1f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -220,6 +220,11 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config XPS + boolean + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + default y + menu "Network testing" config NET_PKTGEN diff --git a/net/atm/br2684.c b/net/atm/br2684.c index ad2b232a2055..fce2eae8d476 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -97,7 +97,7 @@ static LIST_HEAD(br2684_devs); static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) { - return (struct br2684_dev *)netdev_priv(net_dev); + return netdev_priv(net_dev); } static inline struct net_device *list_entry_brdev(const struct list_head *le) diff --git a/net/atm/clip.c b/net/atm/clip.c index ff956d1115bc..d257da50fcfb 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -502,7 +502,8 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = 1 }; struct rtable *rt; if (vcc->push != clip_push) { diff --git a/net/atm/lec.c b/net/atm/lec.c index 181d70c73d70..179e04bc99dd 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -816,8 +816,7 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg) if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; - return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]), - vcc); + return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); } /* Initialize device. */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index d1e433f7d673..7ca1f46a471a 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ -bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o +bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o diff --git a/net/bridge/br.c b/net/bridge/br.c index c8436fa31344..84bbb82599b2 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,8 +22,6 @@ #include "br_private.h" -int (*br_should_route_hook)(struct sk_buff *skb); - static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; @@ -102,8 +100,6 @@ static void __exit br_deinit(void) br_fdb_fini(); } -EXPORT_SYMBOL(br_should_route_hook); - module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 90512ccfd3e9..2872393b2939 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -238,15 +238,18 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) { struct net_bridge_fdb_entry *fdb; + struct net_bridge_port *port; int ret; - if (!br_port_exists(dev)) - return 0; - rcu_read_lock(); - fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr); - ret = fdb && fdb->dst->dev != dev && - fdb->dst->state == BR_STATE_FORWARDING; + port = br_port_get_rcu(dev); + if (!port) + ret = 0; + else { + fdb = __br_fdb_get(port->br, addr); + ret = fdb && fdb->dst->dev != dev && + fdb->dst->state == BR_STATE_FORWARDING; + } rcu_read_unlock(); return ret; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index cbfe87f0f34a..2bd11ec6d166 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -223,7 +223,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct net_bridge_port_group *p; struct hlist_node *rp; - rp = rcu_dereference(br->router_list.first); + rp = rcu_dereference(hlist_first_rcu(&br->router_list)); p = mdst ? rcu_dereference(mdst->ports) : NULL; while (p || rp) { struct net_bridge_port *port, *lport, *rport; @@ -242,7 +242,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); if ((unsigned long)rport >= (unsigned long)port) - rp = rcu_dereference(rp->next); + rp = rcu_dereference(hlist_next_rcu(rp)); } if (!prev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 89ad25a76202..d9d1e2bac1d6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -475,11 +475,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; - if (!br_port_exists(dev)) - return -EINVAL; - - p = br_port_get(dev); - if (p->br != br) + p = br_port_get_rtnl(dev); + if (!p || p->br != br) return -EINVAL; del_nbp(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 25207a1f182b..6f6d8e1b776f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -21,6 +21,10 @@ /* Bridge group multicast address 802.1d (pg 51). */ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +/* Hook for brouter */ +br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; +EXPORT_SYMBOL(br_should_route_hook); + static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; @@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) { struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; - int (*rhook)(struct sk_buff *skb); + br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return skb; @@ -173,8 +177,8 @@ forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); - if (rhook != NULL) { - if (rhook(skb)) + if (rhook) { + if ((*rhook)(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eb5b256ffc88..326e599f83fb 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -33,6 +33,9 @@ #include "br_private.h" +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline int ipv6_is_local_multicast(const struct in6_addr *addr) { @@ -135,7 +138,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get( struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb); struct br_ip ip; if (br->multicast_disabled) @@ -235,7 +238,8 @@ static void br_multicast_group_expired(unsigned long data) if (mp->ports) goto out; - mdb = br->mdb; + mdb = mlock_dereference(br->mdb, br); + hlist_del_rcu(&mp->hlist[mdb->ver]); mdb->size--; @@ -249,16 +253,20 @@ out: static void br_multicast_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; - struct net_bridge_port_group **pp; + struct net_bridge_port_group __rcu **pp; + + mdb = mlock_dereference(br->mdb, br); mp = br_mdb_ip_get(mdb, &pg->addr); if (WARN_ON(!mp)) return; - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (p != pg) continue; @@ -294,10 +302,10 @@ out: spin_unlock(&br->multicast_lock); } -static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max, +static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, int elasticity) { - struct net_bridge_mdb_htable *old = *mdbp; + struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1); struct net_bridge_mdb_htable *mdb; int err; @@ -569,7 +577,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, int hash) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct hlist_node *p; unsigned count = 0; @@ -577,6 +585,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( int elasticity; int err; + mdb = rcu_dereference_protected(br->mdb, 1); hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { count++; if (unlikely(br_ip_equal(group, &mp->addr))) @@ -642,10 +651,11 @@ static struct net_bridge_mdb_entry *br_multicast_new_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) { - struct net_bridge_mdb_htable *mdb = br->mdb; + struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; int hash; + mdb = rcu_dereference_protected(br->mdb, 1); if (!mdb) { if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0)) return NULL; @@ -660,7 +670,7 @@ static struct net_bridge_mdb_entry *br_multicast_new_group( case -EAGAIN: rehash: - mdb = br->mdb; + mdb = rcu_dereference_protected(br->mdb, 1); hash = br_ip_hash(mdb, group); break; @@ -692,7 +702,7 @@ static int br_multicast_add_group(struct net_bridge *br, { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; - struct net_bridge_port_group **pp; + struct net_bridge_port_group __rcu **pp; unsigned long now = jiffies; int err; @@ -712,7 +722,9 @@ static int br_multicast_add_group(struct net_bridge *br, goto out; } - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (p->port == port) goto found; if ((unsigned long)p->port < (unsigned long)port) @@ -1106,7 +1118,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct igmpv3_query *ih3; struct net_bridge_port_group *p; - struct net_bridge_port_group **pp; + struct net_bridge_port_group __rcu **pp; unsigned long max_delay; unsigned long now = jiffies; __be32 group; @@ -1145,7 +1157,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip4_get(br->mdb, group); + mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group); if (!mp) goto out; @@ -1157,7 +1169,9 @@ static int br_ip4_multicast_query(struct net_bridge *br, try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) @@ -1178,7 +1192,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); struct net_bridge_mdb_entry *mp; struct mld2_query *mld2q; - struct net_bridge_port_group *p, **pp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; unsigned long max_delay; unsigned long now = jiffies; struct in6_addr *group = NULL; @@ -1214,7 +1229,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - mp = br_mdb_ip6_get(br->mdb, group); + mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group); if (!mp) goto out; @@ -1225,7 +1240,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); - for (pp = &mp->ports; (p = *pp); pp = &p->next) { + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) @@ -1254,7 +1271,7 @@ static void br_multicast_leave_group(struct net_bridge *br, timer_pending(&br->multicast_querier_timer)) goto out; - mdb = br->mdb; + mdb = mlock_dereference(br->mdb, br); mp = br_mdb_ip_get(mdb, group); if (!mp) goto out; @@ -1277,7 +1294,9 @@ static void br_multicast_leave_group(struct net_bridge *br, goto out; } - for (p = mp->ports; p; p = p->next) { + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { if (p->port != port) continue; @@ -1625,7 +1644,7 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->multicast_query_timer); spin_lock_bh(&br->multicast_lock); - mdb = br->mdb; + mdb = mlock_dereference(br->mdb, br); if (!mdb) goto out; @@ -1729,6 +1748,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; int err = 0; + struct net_bridge_mdb_htable *mdb; spin_lock(&br->multicast_lock); if (br->multicast_disabled == !val) @@ -1741,15 +1761,16 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (!netif_running(br->dev)) goto unlock; - if (br->mdb) { - if (br->mdb->old) { + mdb = mlock_dereference(br->mdb, br); + if (mdb) { + if (mdb->old) { err = -EEXIST; rollback: br->multicast_disabled = !!val; goto unlock; } - err = br_mdb_rehash(&br->mdb, br->mdb->max, + err = br_mdb_rehash(&br->mdb, mdb->max, br->hash_elasticity); if (err) goto rollback; @@ -1774,6 +1795,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) { int err = -ENOENT; u32 old; + struct net_bridge_mdb_htable *mdb; spin_lock(&br->multicast_lock); if (!netif_running(br->dev)) @@ -1782,7 +1804,9 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) err = -EINVAL; if (!is_power_of_2(val)) goto unlock; - if (br->mdb && val < br->mdb->size) + + mdb = mlock_dereference(br->mdb, br); + if (mdb && val < mdb->size) goto unlock; err = 0; @@ -1790,8 +1814,8 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) old = br->hash_max; br->hash_max = val; - if (br->mdb) { - if (br->mdb->old) { + if (mdb) { + if (mdb->old) { err = -EEXIST; rollback: br->hash_max = old; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 865fd7634b67..6e1392093911 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -131,17 +131,18 @@ void br_netfilter_rtable_init(struct net_bridge *br) static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { - if (!br_port_exists(dev)) - return NULL; - return &br_port_get_rcu(dev)->br->fake_rtable; + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; } static inline struct net_device *bridge_parent(const struct net_device *dev) { - if (!br_port_exists(dev)) - return NULL; + struct net_bridge_port *port; - return br_port_get_rcu(dev)->br->dev; + port = br_port_get_rcu(dev); + return port ? port->br->dev : NULL; } static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) @@ -412,13 +413,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = 0, - .tos = RT_TOS(iph->tos) }, - }, - .proto = 0, + .fl4_dst = iph->daddr, + .fl4_tos = RT_TOS(iph->tos), }; struct in_device *in_dev = __in_dev_get_rcu(dev); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4a6a378c84e3..f8bf4c7f842c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -119,11 +119,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; for_each_netdev(net, dev) { + struct net_bridge_port *port = br_port_get_rtnl(dev); + /* not a bridge port */ - if (!br_port_exists(dev) || idx < cb->args[0]) + if (!port || idx < cb->args[0]) goto skip; - if (br_fill_ifinfo(skb, br_port_get(dev), + if (br_fill_ifinfo(skb, port, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) @@ -169,9 +171,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev) return -ENODEV; - if (!br_port_exists(dev)) + p = br_port_get_rtnl(dev); + if (!p) return -EINVAL; - p = br_port_get(dev); /* if kernel STP is running, don't allow changes */ if (p->br->stp_enabled == BR_KERNEL_STP) diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 404d4e14c6a7..7d337c9b6082 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -32,15 +32,15 @@ struct notifier_block br_device_notifier = { static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net_bridge_port *p = br_port_get(dev); + struct net_bridge_port *p; struct net_bridge *br; int err; /* not a port of a bridge */ - if (!br_port_exists(dev)) + p = br_port_get_rtnl(dev); + if (!p) return NOTIFY_DONE; - p = br_port_get(dev); br = p->br; switch (event) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 75c90edaf7db..84aac7734bfc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -72,7 +72,7 @@ struct net_bridge_fdb_entry struct net_bridge_port_group { struct net_bridge_port *port; - struct net_bridge_port_group *next; + struct net_bridge_port_group __rcu *next; struct hlist_node mglist; struct rcu_head rcu; struct timer_list timer; @@ -86,7 +86,7 @@ struct net_bridge_mdb_entry struct hlist_node hlist[2]; struct hlist_node mglist; struct net_bridge *br; - struct net_bridge_port_group *ports; + struct net_bridge_port_group __rcu *ports; struct rcu_head rcu; struct timer_list timer; struct timer_list query_timer; @@ -151,11 +151,20 @@ struct net_bridge_port #endif }; -#define br_port_get_rcu(dev) \ - ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data)) -#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data) #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) +static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; +} + +static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev) +{ + return br_port_exists(dev) ? + rtnl_dereference(dev->rx_handler_data) : NULL; +} + struct br_cpu_netstats { u64 rx_packets; u64 rx_bytes; @@ -227,7 +236,7 @@ struct net_bridge unsigned long multicast_startup_query_interval; spinlock_t multicast_lock; - struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_htable __rcu *mdb; struct hlist_head router_list; struct hlist_head mglist; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 35cf27087b56..3d9a55d3822f 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -141,10 +141,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_bridge *br; const unsigned char *buf; - if (!br_port_exists(dev)) - goto err; - p = br_port_get_rcu(dev); - if (!pskb_may_pull(skb, 4)) goto err; @@ -153,6 +149,10 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; + p = br_port_get_rcu(dev); + if (!p) + goto err; + br = p->br; spin_lock(&br->lock); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ae3f106c3908..1bcaf36ad612 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, ebt_broute); + rcu_assign_pointer(br_should_route_hook, + (br_should_route_hook_t *)ebt_broute); return 0; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index a1dcf83f0d58..cbc9f395ab1e 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -128,6 +128,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out) { const struct ethhdr *h = eth_hdr(skb); + const struct net_bridge_port *p; __be16 ethproto; int verdict, i; @@ -148,13 +149,11 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) return 1; /* rcu_read_lock()ed by nf_hook_slow */ - if (in && br_port_exists(in) && - FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev), - EBT_ILOGICALIN)) + if (in && (p = br_port_get_rcu(in)) != NULL && + FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN)) return 1; - if (out && br_port_exists(out) && - FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev), - EBT_ILOGICALOUT)) + if (out && (p = br_port_get_rcu(out)) != NULL && + FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT)) return 1; if (e->bitmask & EBT_SOURCEMAC) { diff --git a/net/caif/Makefile b/net/caif/Makefile index f87481fb0e65..9d38e406e4a4 100644 --- a/net/caif/Makefile +++ b/net/caif/Makefile @@ -1,8 +1,6 @@ -ifeq ($(CONFIG_CAIF_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG -caif-objs := caif_dev.o \ +caif-y := caif_dev.o \ cfcnfg.o cfmuxl.o cfctrl.o \ cffrml.o cfveil.o cfdbgl.o\ cfserl.o cfdgml.o \ @@ -13,4 +11,4 @@ obj-$(CONFIG_CAIF) += caif.o obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o obj-$(CONFIG_CAIF) += caif_socket.o -export-objs := caif.o +export-y := caif.o diff --git a/net/can/Makefile b/net/can/Makefile index 9cd3c4b3abda..2d3894b32742 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -3,10 +3,10 @@ # obj-$(CONFIG_CAN) += can.o -can-objs := af_can.o proc.o +can-y := af_can.o proc.o obj-$(CONFIG_CAN_RAW) += can-raw.o -can-raw-objs := raw.o +can-raw-y := raw.o obj-$(CONFIG_CAN_BCM) += can-bcm.o -can-bcm-objs := bcm.o +can-bcm-y := bcm.o diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 5f19415ec9c0..e87ef435e11b 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_CEPH_LIB) += libceph.o -libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ +libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ diff --git a/net/core/datagram.c b/net/core/datagram.c index cd1e039c8755..18ac112ea7ae 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, * interrupt level will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ unsigned long cpu_flags; diff --git a/net/core/dev.c b/net/core/dev.c index 0dd54a69dace..d28b3a023bb2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** - * dev_getbyhwaddr - find a device by its hardware address + * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace * @type: media type of device * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased + * is not found or a pointer to the device. The caller must hold RCU + * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *ha) { struct net_device *dev; - ASSERT_RTNL(); - - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; return NULL; } -EXPORT_SYMBOL(dev_getbyhwaddr); +EXPORT_SYMBOL(dev_getbyhwaddr_rcu); struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { @@ -1557,12 +1554,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + int rc; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, + txq); + if (rc) + return rc; + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -1794,16 +1798,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; __be16 type = skb->protocol; + int vlan_depth = ETH_HLEN; int err; - if (type == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh; + while (type == htons(ETH_P_8021Q)) { + struct vlan_hdr *vh; - if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) + if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) return ERR_PTR(-EINVAL); - veh = (struct vlan_ethhdr *)skb->data; - type = veh->h_vlan_encapsulated_proto; + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; } skb_reset_mac_header(skb); @@ -1817,8 +1823,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) dev->ethtool_ops->get_drvinfo(dev, &info); - WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " - "ip_summed=%d", + WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", info.driver, dev ? dev->features : 0L, skb->sk ? skb->sk->sk_route_caps : 0L, skb->len, skb->data_len, skb->ip_summed); @@ -1967,6 +1972,23 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +{ + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else if (!skb->vlan_tci) + return dev->features; + + if (protocol != htons(ETH_P_8021Q)) + return dev->features & dev->vlan_features; + else + return 0; +} +EXPORT_SYMBOL(netif_get_vlan_features); + /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or @@ -1977,15 +1999,20 @@ static inline void skb_orphan_try(struct sk_buff *skb) static inline int skb_needs_linearize(struct sk_buff *skb, struct net_device *dev) { - int features = dev->features; + if (skb_is_nonlinear(skb)) { + int features = dev->features; - if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb)) - features &= dev->vlan_features; + if (vlan_tx_tag_present(skb)) + features &= dev->vlan_features; + + return (skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && + (!(features & NETIF_F_SG) || + illegal_highdma(dev, skb))); + } - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb)))); + return 0; } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -1995,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2005,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_orphan_try(skb); if (vlan_tx_tag_present(skb) && @@ -2119,26 +2146,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) return queue_index; } +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { int queue_index; const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) { + if (dev->real_num_tx_queues == 1) + queue_index = 0; + else if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb); queue_index = dev_cap_txqueue(dev, queue_index); } else { struct sock *sk = skb->sk; queue_index = sk_tx_queue_get(sk); - if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) { - queue_index = 0; - if (dev->real_num_tx_queues > 1) + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int old_index = queue_index; + + queue_index = get_xps_queue(dev, skb); + if (queue_index < 0) queue_index = skb_tx_hash(dev, skb); - if (sk) { - struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); + if (queue_index != old_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); @@ -4967,10 +5038,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } if (features & NETIF_F_UFO) { - if (!(features & NETIF_F_GEN_CSUM)) { + /* maybe split UFO into V4 and V6? */ + if (!((features & NETIF_F_GEN_CSUM) || + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { if (name) printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_HW_CSUM feature.\n", + "since no checksum offload features.\n", name); features &= ~NETIF_F_UFO; } @@ -5014,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); +#ifdef CONFIG_RPS static int netif_alloc_rx_queues(struct net_device *dev) { -#ifdef CONFIG_RPS unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; @@ -5029,15 +5103,22 @@ static int netif_alloc_rx_queues(struct net_device *dev) } dev->_rx = rx; - /* - * Set a pointer to first element in the array which holds the - * reference count. - */ for (i = 0; i < count; i++) - rx[i].first = rx; -#endif + rx[i].dev = dev; return 0; } +#endif + +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, void *_unused) +{ + /* Initialize queue lock */ + spin_lock_init(&queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + queue->xmit_lock_owner = -1; + netdev_queue_numa_node_write(queue, -1); + queue->dev = dev; +} static int netif_alloc_netdev_queues(struct net_device *dev) { @@ -5053,25 +5134,11 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return -ENOMEM; } dev->_tx = tx; - return 0; -} -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - queue->dev = dev; - - /* Initialize queue lock */ - spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); - queue->xmit_lock_owner = -1; -} - -static void netdev_init_queues(struct net_device *dev) -{ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); spin_lock_init(&dev->tx_global_lock); + + return 0; } /** @@ -5110,16 +5177,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = netif_alloc_rx_queues(dev); - if (ret) - goto out; - - ret = netif_alloc_netdev_queues(dev); - if (ret) - goto out; - - netdev_init_queues(dev); - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5577,10 +5634,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; + if (netif_alloc_netdev_queues(dev)) + goto free_pcpu; #ifdef CONFIG_RPS dev->num_rx_queues = queue_count; dev->real_num_rx_queues = queue_count; + if (netif_alloc_rx_queues(dev)) + goto free_pcpu; #endif dev->gso_max_size = GSO_MAX_SIZE; @@ -5597,6 +5658,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, free_pcpu: free_percpu(dev->pcpu_refcnt); + kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif + free_p: kfree(p); return NULL; @@ -5618,6 +5684,9 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif kfree(rcu_dereference_raw(dev->ingress_queue)); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 956a9f4971cb..d5bc28818883 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1171,7 +1171,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return -EFAULT; if (edata.data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; return dev->ethtool_ops->set_ufo(dev, edata.data); } diff --git a/net/core/filter.c b/net/core/filter.c index ae21a0d3c4a2..e193e29d4671 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -37,9 +37,58 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/filter.h> +#include <linux/reciprocal_div.h> + +enum { + BPF_S_RET_K = 1, + BPF_S_RET_A, + BPF_S_ALU_ADD_K, + BPF_S_ALU_ADD_X, + BPF_S_ALU_SUB_K, + BPF_S_ALU_SUB_X, + BPF_S_ALU_MUL_K, + BPF_S_ALU_MUL_X, + BPF_S_ALU_DIV_X, + BPF_S_ALU_AND_K, + BPF_S_ALU_AND_X, + BPF_S_ALU_OR_K, + BPF_S_ALU_OR_X, + BPF_S_ALU_LSH_K, + BPF_S_ALU_LSH_X, + BPF_S_ALU_RSH_K, + BPF_S_ALU_RSH_X, + BPF_S_ALU_NEG, + BPF_S_LD_W_ABS, + BPF_S_LD_H_ABS, + BPF_S_LD_B_ABS, + BPF_S_LD_W_LEN, + BPF_S_LD_W_IND, + BPF_S_LD_H_IND, + BPF_S_LD_B_IND, + BPF_S_LD_IMM, + BPF_S_LDX_W_LEN, + BPF_S_LDX_B_MSH, + BPF_S_LDX_IMM, + BPF_S_MISC_TAX, + BPF_S_MISC_TXA, + BPF_S_ALU_DIV_K, + BPF_S_LD_MEM, + BPF_S_LDX_MEM, + BPF_S_ST, + BPF_S_STX, + BPF_S_JMP_JA, + BPF_S_JMP_JEQ_K, + BPF_S_JMP_JEQ_X, + BPF_S_JMP_JGE_K, + BPF_S_JMP_JGE_X, + BPF_S_JMP_JGT_K, + BPF_S_JMP_JGT_X, + BPF_S_JMP_JSET_K, + BPF_S_JMP_JSET_X, +}; /* No hurry in this branch */ -static void *__load_pointer(struct sk_buff *skb, int k) +static void *__load_pointer(const struct sk_buff *skb, int k) { u8 *ptr = NULL; @@ -53,7 +102,7 @@ static void *__load_pointer(struct sk_buff *skb, int k) return NULL; } -static inline void *load_pointer(struct sk_buff *skb, int k, +static inline void *load_pointer(const struct sk_buff *skb, int k, unsigned int size, void *buffer) { if (k >= 0) @@ -89,7 +138,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); + unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -103,50 +152,52 @@ EXPORT_SYMBOL(sk_filter); * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on * @filter: filter to apply - * @flen: length of filter * * Decode and apply filter instructions to the skb->data. - * Return length to keep, 0 for none. skb is the data we are - * filtering, filter is the array of filter instructions, and - * len is the number of filter blocks in the array. + * Return length to keep, 0 for none. @skb is the data we are + * filtering, @filter is the array of filter instructions. + * Because all jumps are guaranteed to be before last instruction, + * and last instruction guaranteed to be a RET, we dont need to check + * flen. (We used to pass to this function the length of filter) */ -unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) +unsigned int sk_run_filter(const struct sk_buff *skb, + const struct sock_filter *fentry) { void *ptr; u32 A = 0; /* Accumulator */ u32 X = 0; /* Index Register */ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ - unsigned long memvalid = 0; u32 tmp; int k; - int pc; - BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); /* * Process array of filter instructions. */ - for (pc = 0; pc < flen; pc++) { - const struct sock_filter *fentry = &filter[pc]; - u32 f_k = fentry->k; + for (;; fentry++) { +#if defined(CONFIG_X86_32) +#define K (fentry->k) +#else + const u32 K = fentry->k; +#endif switch (fentry->code) { case BPF_S_ALU_ADD_X: A += X; continue; case BPF_S_ALU_ADD_K: - A += f_k; + A += K; continue; case BPF_S_ALU_SUB_X: A -= X; continue; case BPF_S_ALU_SUB_K: - A -= f_k; + A -= K; continue; case BPF_S_ALU_MUL_X: A *= X; continue; case BPF_S_ALU_MUL_K: - A *= f_k; + A *= K; continue; case BPF_S_ALU_DIV_X: if (X == 0) @@ -154,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int A /= X; continue; case BPF_S_ALU_DIV_K: - A /= f_k; + A = reciprocal_divide(A, K); continue; case BPF_S_ALU_AND_X: A &= X; continue; case BPF_S_ALU_AND_K: - A &= f_k; + A &= K; continue; case BPF_S_ALU_OR_X: A |= X; continue; case BPF_S_ALU_OR_K: - A |= f_k; + A |= K; continue; case BPF_S_ALU_LSH_X: A <<= X; continue; case BPF_S_ALU_LSH_K: - A <<= f_k; + A <<= K; continue; case BPF_S_ALU_RSH_X: A >>= X; continue; case BPF_S_ALU_RSH_K: - A >>= f_k; + A >>= K; continue; case BPF_S_ALU_NEG: A = -A; continue; case BPF_S_JMP_JA: - pc += f_k; + fentry += K; continue; case BPF_S_JMP_JGT_K: - pc += (A > f_k) ? fentry->jt : fentry->jf; + fentry += (A > K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_K: - pc += (A >= f_k) ? fentry->jt : fentry->jf; + fentry += (A >= K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_K: - pc += (A == f_k) ? fentry->jt : fentry->jf; + fentry += (A == K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_K: - pc += (A & f_k) ? fentry->jt : fentry->jf; + fentry += (A & K) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGT_X: - pc += (A > X) ? fentry->jt : fentry->jf; + fentry += (A > X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JGE_X: - pc += (A >= X) ? fentry->jt : fentry->jf; + fentry += (A >= X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JEQ_X: - pc += (A == X) ? fentry->jt : fentry->jf; + fentry += (A == X) ? fentry->jt : fentry->jf; continue; case BPF_S_JMP_JSET_X: - pc += (A & X) ? fentry->jt : fentry->jf; + fentry += (A & X) ? fentry->jt : fentry->jf; continue; case BPF_S_LD_W_ABS: - k = f_k; + k = K; load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { @@ -220,7 +271,7 @@ load_w: } break; case BPF_S_LD_H_ABS: - k = f_k; + k = K; load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { @@ -229,7 +280,7 @@ load_h: } break; case BPF_S_LD_B_ABS: - k = f_k; + k = K; load_b: ptr = load_pointer(skb, k, 1, &tmp); if (ptr != NULL) { @@ -244,34 +295,32 @@ load_b: X = skb->len; continue; case BPF_S_LD_W_IND: - k = X + f_k; + k = X + K; goto load_w; case BPF_S_LD_H_IND: - k = X + f_k; + k = X + K; goto load_h; case BPF_S_LD_B_IND: - k = X + f_k; + k = X + K; goto load_b; case BPF_S_LDX_B_MSH: - ptr = load_pointer(skb, f_k, 1, &tmp); + ptr = load_pointer(skb, K, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; case BPF_S_LD_IMM: - A = f_k; + A = K; continue; case BPF_S_LDX_IMM: - X = f_k; + X = K; continue; case BPF_S_LD_MEM: - A = (memvalid & (1UL << f_k)) ? - mem[f_k] : 0; + A = mem[K]; continue; case BPF_S_LDX_MEM: - X = (memvalid & (1UL << f_k)) ? - mem[f_k] : 0; + X = mem[K]; continue; case BPF_S_MISC_TAX: X = A; @@ -280,16 +329,14 @@ load_b: A = X; continue; case BPF_S_RET_K: - return f_k; + return K; case BPF_S_RET_A: return A; case BPF_S_ST: - memvalid |= 1UL << f_k; - mem[f_k] = A; + mem[K] = A; continue; case BPF_S_STX: - memvalid |= 1UL << f_k; - mem[f_k] = X; + mem[K] = X; continue; default: WARN_ON(1); @@ -323,6 +370,12 @@ load_b: return 0; A = skb->dev->type; continue; + case SKF_AD_RXHASH: + A = skb->rxhash; + continue; + case SKF_AD_CPU: + A = raw_smp_processor_id(); + continue; case SKF_AD_NLATTR: { struct nlattr *nla; @@ -367,6 +420,66 @@ load_b: } EXPORT_SYMBOL(sk_run_filter); +/* + * Security : + * A BPF program is able to use 16 cells of memory to store intermediate + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) + * As we dont want to clear mem[] array for each packet going through + * sk_run_filter(), we check that filter loaded by user never try to read + * a cell if not previously written, and we check all branches to be sure + * a malicious user doesnt try to abuse us. + */ +static int check_load_and_stores(struct sock_filter *filter, int flen) +{ + u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + int pc, ret = 0; + + BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + if (!masks) + return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); + + for (pc = 0; pc < flen; pc++) { + memvalid &= masks[pc]; + + switch (filter[pc].code) { + case BPF_S_ST: + case BPF_S_STX: + memvalid |= (1 << filter[pc].k); + break; + case BPF_S_LD_MEM: + case BPF_S_LDX_MEM: + if (!(memvalid & (1 << filter[pc].k))) { + ret = -EINVAL; + goto error; + } + break; + case BPF_S_JMP_JA: + /* a jump must set masks on target */ + masks[pc + 1 + filter[pc].k] &= memvalid; + memvalid = ~0; + break; + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: + /* a jump must set masks on targets */ + masks[pc + 1 + filter[pc].jt] &= memvalid; + masks[pc + 1 + filter[pc].jf] &= memvalid; + memvalid = ~0; + break; + } + } +error: + kfree(masks); + return ret; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -383,7 +496,57 @@ EXPORT_SYMBOL(sk_run_filter); */ int sk_chk_filter(struct sock_filter *filter, int flen) { - struct sock_filter *ftest; + /* + * Valid instructions are initialized to non-0. + * Invalid instructions are initialized to 0. + */ + static const u8 codes[] = { + [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, + [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, + [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, + [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, + [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, + [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, + [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, + [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, + [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, + [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, + [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, + [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, + [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, + [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, + [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, + [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, + [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, + [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, + [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, + [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, + [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, + [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, + [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, + [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, + [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, + [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, + [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, + [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, + [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, + [BPF_RET|BPF_K] = BPF_S_RET_K, + [BPF_RET|BPF_A] = BPF_S_RET_A, + [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, + [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, + [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, + [BPF_ST] = BPF_S_ST, + [BPF_STX] = BPF_S_STX, + [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, + [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, + [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, + [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, + [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, + [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, + [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, + [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, + [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, + }; int pc; if (flen == 0 || flen > BPF_MAXINSNS) @@ -391,136 +554,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen) /* check the filter code now */ for (pc = 0; pc < flen; pc++) { - ftest = &filter[pc]; - - /* Only allow valid instructions */ - switch (ftest->code) { - case BPF_ALU|BPF_ADD|BPF_K: - ftest->code = BPF_S_ALU_ADD_K; - break; - case BPF_ALU|BPF_ADD|BPF_X: - ftest->code = BPF_S_ALU_ADD_X; - break; - case BPF_ALU|BPF_SUB|BPF_K: - ftest->code = BPF_S_ALU_SUB_K; - break; - case BPF_ALU|BPF_SUB|BPF_X: - ftest->code = BPF_S_ALU_SUB_X; - break; - case BPF_ALU|BPF_MUL|BPF_K: - ftest->code = BPF_S_ALU_MUL_K; - break; - case BPF_ALU|BPF_MUL|BPF_X: - ftest->code = BPF_S_ALU_MUL_X; - break; - case BPF_ALU|BPF_DIV|BPF_X: - ftest->code = BPF_S_ALU_DIV_X; - break; - case BPF_ALU|BPF_AND|BPF_K: - ftest->code = BPF_S_ALU_AND_K; - break; - case BPF_ALU|BPF_AND|BPF_X: - ftest->code = BPF_S_ALU_AND_X; - break; - case BPF_ALU|BPF_OR|BPF_K: - ftest->code = BPF_S_ALU_OR_K; - break; - case BPF_ALU|BPF_OR|BPF_X: - ftest->code = BPF_S_ALU_OR_X; - break; - case BPF_ALU|BPF_LSH|BPF_K: - ftest->code = BPF_S_ALU_LSH_K; - break; - case BPF_ALU|BPF_LSH|BPF_X: - ftest->code = BPF_S_ALU_LSH_X; - break; - case BPF_ALU|BPF_RSH|BPF_K: - ftest->code = BPF_S_ALU_RSH_K; - break; - case BPF_ALU|BPF_RSH|BPF_X: - ftest->code = BPF_S_ALU_RSH_X; - break; - case BPF_ALU|BPF_NEG: - ftest->code = BPF_S_ALU_NEG; - break; - case BPF_LD|BPF_W|BPF_ABS: - ftest->code = BPF_S_LD_W_ABS; - break; - case BPF_LD|BPF_H|BPF_ABS: - ftest->code = BPF_S_LD_H_ABS; - break; - case BPF_LD|BPF_B|BPF_ABS: - ftest->code = BPF_S_LD_B_ABS; - break; - case BPF_LD|BPF_W|BPF_LEN: - ftest->code = BPF_S_LD_W_LEN; - break; - case BPF_LD|BPF_W|BPF_IND: - ftest->code = BPF_S_LD_W_IND; - break; - case BPF_LD|BPF_H|BPF_IND: - ftest->code = BPF_S_LD_H_IND; - break; - case BPF_LD|BPF_B|BPF_IND: - ftest->code = BPF_S_LD_B_IND; - break; - case BPF_LD|BPF_IMM: - ftest->code = BPF_S_LD_IMM; - break; - case BPF_LDX|BPF_W|BPF_LEN: - ftest->code = BPF_S_LDX_W_LEN; - break; - case BPF_LDX|BPF_B|BPF_MSH: - ftest->code = BPF_S_LDX_B_MSH; - break; - case BPF_LDX|BPF_IMM: - ftest->code = BPF_S_LDX_IMM; - break; - case BPF_MISC|BPF_TAX: - ftest->code = BPF_S_MISC_TAX; - break; - case BPF_MISC|BPF_TXA: - ftest->code = BPF_S_MISC_TXA; - break; - case BPF_RET|BPF_K: - ftest->code = BPF_S_RET_K; - break; - case BPF_RET|BPF_A: - ftest->code = BPF_S_RET_A; - break; + struct sock_filter *ftest = &filter[pc]; + u16 code = ftest->code; + if (code >= ARRAY_SIZE(codes)) + return -EINVAL; + code = codes[code]; + if (!code) + return -EINVAL; /* Some instructions need special checks */ - + switch (code) { + case BPF_S_ALU_DIV_K: /* check for division by zero */ - case BPF_ALU|BPF_DIV|BPF_K: if (ftest->k == 0) return -EINVAL; - ftest->code = BPF_S_ALU_DIV_K; - break; - - /* check for invalid memory addresses */ - case BPF_LD|BPF_MEM: - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - ftest->code = BPF_S_LD_MEM; - break; - case BPF_LDX|BPF_MEM: - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - ftest->code = BPF_S_LDX_MEM; - break; - case BPF_ST: - if (ftest->k >= BPF_MEMWORDS) - return -EINVAL; - ftest->code = BPF_S_ST; + ftest->k = reciprocal_value(ftest->k); break; - case BPF_STX: + case BPF_S_LD_MEM: + case BPF_S_LDX_MEM: + case BPF_S_ST: + case BPF_S_STX: + /* check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; - ftest->code = BPF_S_STX; break; - - case BPF_JMP|BPF_JA: + case BPF_S_JMP_JA: /* * Note, the large ftest->k might cause loops. * Compare this with conditional jumps below, @@ -528,40 +586,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ if (ftest->k >= (unsigned)(flen-pc-1)) return -EINVAL; - ftest->code = BPF_S_JMP_JA; - break; - - case BPF_JMP|BPF_JEQ|BPF_K: - ftest->code = BPF_S_JMP_JEQ_K; - break; - case BPF_JMP|BPF_JEQ|BPF_X: - ftest->code = BPF_S_JMP_JEQ_X; - break; - case BPF_JMP|BPF_JGE|BPF_K: - ftest->code = BPF_S_JMP_JGE_K; - break; - case BPF_JMP|BPF_JGE|BPF_X: - ftest->code = BPF_S_JMP_JGE_X; - break; - case BPF_JMP|BPF_JGT|BPF_K: - ftest->code = BPF_S_JMP_JGT_K; - break; - case BPF_JMP|BPF_JGT|BPF_X: - ftest->code = BPF_S_JMP_JGT_X; - break; - case BPF_JMP|BPF_JSET|BPF_K: - ftest->code = BPF_S_JMP_JSET_K; break; - case BPF_JMP|BPF_JSET|BPF_X: - ftest->code = BPF_S_JMP_JSET_X; - break; - - default: - return -EINVAL; - } - - /* for conditionals both must be safe */ - switch (ftest->code) { case BPF_S_JMP_JEQ_K: case BPF_S_JMP_JEQ_X: case BPF_S_JMP_JGE_K: @@ -570,21 +595,22 @@ int sk_chk_filter(struct sock_filter *filter, int flen) case BPF_S_JMP_JGT_X: case BPF_S_JMP_JSET_X: case BPF_S_JMP_JSET_K: + /* for conditionals both must be safe */ if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; + break; } + ftest->code = code; } /* last instruction must be a RET code */ switch (filter[flen - 1].code) { case BPF_S_RET_K: case BPF_S_RET_A: - return 0; - break; - default: - return -EINVAL; - } + return check_load_and_stores(filter, flen); + } + return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7f902cad10f8..85e8b5326dd6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = { static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); - struct netdev_rx_queue *first = queue->first; struct rps_map *map; struct rps_dev_flow_table *flow_table; @@ -723,10 +722,8 @@ static void rx_queue_release(struct kobject *kobj) call_rcu(&flow_table->rcu, rps_dev_flow_table_release); } - if (atomic_dec_and_test(&first->count)) - kfree(first); - else - memset(kobj, 0, sizeof(*kobj)); + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); } static struct kobj_type rx_queue_ktype = { @@ -738,7 +735,6 @@ static struct kobj_type rx_queue_ktype = { static int rx_queue_add_kobject(struct net_device *net, int index) { struct netdev_rx_queue *queue = net->_rx + index; - struct netdev_rx_queue *first = queue->first; struct kobject *kobj = &queue->kobj; int error = 0; @@ -751,14 +747,16 @@ static int rx_queue_add_kobject(struct net_device *net, int index) } kobject_uevent(kobj, KOBJ_ADD); - atomic_inc(&first->count); + dev_hold(queue->dev); return error; } +#endif /* CONFIG_RPS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { +#ifdef CONFIG_RPS int i; int error = 0; @@ -774,23 +772,422 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) kobject_put(&net->_rx[i].kobj); return error; +#else + return 0; +#endif +} + +#ifdef CONFIG_XPS +/* + * netdev_queue sysfs structures and functions. + */ +struct netdev_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, const char *buf, size_t len); +}; +#define to_netdev_queue_attr(_attr) container_of(_attr, \ + struct netdev_queue_attribute, attr) + +#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) + +static ssize_t netdev_queue_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(queue, attribute, buf); } -static int rx_queue_register_kobjects(struct net_device *net) +static ssize_t netdev_queue_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) { + struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + struct netdev_queue *queue = to_netdev_queue(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(queue, attribute, buf, count); +} + +static const struct sysfs_ops netdev_queue_sysfs_ops = { + .show = netdev_queue_attr_show, + .store = netdev_queue_attr_store, +}; + +static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) + if (queue == &dev->_tx[i]) + break; + + BUG_ON(i >= dev->num_tx_queues); + + return i; +} + + +static ssize_t show_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, char *buf) +{ + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + cpumask_var_t mask; + unsigned long index; + size_t len = 0; + int i; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + for_each_possible_cpu(i) { + struct xps_map *map = + rcu_dereference(dev_maps->cpu_map[i]); + if (map) { + int j; + for (j = 0; j < map->len; j++) { + if (map->queues[j] == index) { + cpumask_set_cpu(i, mask); + break; + } + } + } + } + } + rcu_read_unlock(); + + len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); + if (PAGE_SIZE - len < 3) { + free_cpumask_var(mask); + return -EINVAL; + } + + free_cpumask_var(mask); + len += sprintf(buf + len, "\n"); + return len; +} + +static void xps_map_release(struct rcu_head *rcu) +{ + struct xps_map *map = container_of(rcu, struct xps_map, rcu); + + kfree(map); +} + +static void xps_dev_maps_release(struct rcu_head *rcu) +{ + struct xps_dev_maps *dev_maps = + container_of(rcu, struct xps_dev_maps, rcu); + + kfree(dev_maps); +} + +static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) + +static ssize_t store_xps_map(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct net_device *dev = queue->dev; + cpumask_var_t mask; + int err, i, cpu, pos, map_len, alloc_len, need_set; + unsigned long index; + struct xps_map *map, *new_map; + struct xps_dev_maps *dev_maps, *new_dev_maps; + int nonempty = 0; + int numa_node = -2; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + index = get_netdev_queue_index(queue); + + err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); + if (err) { + free_cpumask_var(mask); + return err; + } + + new_dev_maps = kzalloc(max_t(unsigned, + XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); + if (!new_dev_maps) { + free_cpumask_var(mask); + return -ENOMEM; + } + + mutex_lock(&xps_map_mutex); + + dev_maps = xmap_dereference(dev->xps_maps); + + for_each_possible_cpu(cpu) { + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + new_map = map; + if (map) { + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + map_len = map->len; + alloc_len = map->alloc_len; + } else + pos = map_len = alloc_len = 0; + + need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); +#ifdef CONFIG_NUMA + if (need_set) { + if (numa_node == -2) + numa_node = cpu_to_node(cpu); + else if (numa_node != cpu_to_node(cpu)) + numa_node = -1; + } +#endif + if (need_set && pos >= map_len) { + /* Need to add queue to this CPU's map */ + if (map_len >= alloc_len) { + alloc_len = alloc_len ? + 2 * alloc_len : XPS_MIN_MAP_ALLOC; + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), + GFP_KERNEL, + cpu_to_node(cpu)); + if (!new_map) + goto error; + new_map->alloc_len = alloc_len; + for (i = 0; i < map_len; i++) + new_map->queues[i] = map->queues[i]; + new_map->len = map_len; + } + new_map->queues[new_map->len++] = index; + } else if (!need_set && pos < map_len) { + /* Need to remove queue from this CPU's map */ + if (map_len > 1) + new_map->queues[pos] = + new_map->queues[--new_map->len]; + else + new_map = NULL; + } + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); + } + + /* Cleanup old maps */ + for_each_possible_cpu(cpu) { + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; + if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) + call_rcu(&map->rcu, xps_map_release); + if (new_dev_maps->cpu_map[cpu]) + nonempty = 1; + } + + if (nonempty) + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + else { + kfree(new_dev_maps); + rcu_assign_pointer(dev->xps_maps, NULL); + } + + if (dev_maps) + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + + netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1); + + mutex_unlock(&xps_map_mutex); + + free_cpumask_var(mask); + return len; + +error: + mutex_unlock(&xps_map_mutex); + + if (new_dev_maps) + for_each_possible_cpu(i) + kfree(rcu_dereference_protected( + new_dev_maps->cpu_map[i], + 1)); + kfree(new_dev_maps); + free_cpumask_var(mask); + return -ENOMEM; +} + +static struct netdev_queue_attribute xps_cpus_attribute = + __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); + +static struct attribute *netdev_queue_default_attrs[] = { + &xps_cpus_attribute.attr, + NULL +}; + +static void netdev_queue_release(struct kobject *kobj) +{ + struct netdev_queue *queue = to_netdev_queue(kobj); + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + struct xps_map *map; + unsigned long index; + int i, pos, nonempty = 0; + + index = get_netdev_queue_index(queue); + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (dev_maps) { + for_each_possible_cpu(i) { + map = xmap_dereference(dev_maps->cpu_map[i]); + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) + map->queues[pos] = + map->queues[--map->len]; + else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], + NULL); + call_rcu(&map->rcu, xps_map_release); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + call_rcu(&dev_maps->rcu, xps_dev_maps_release); + } + } + + mutex_unlock(&xps_map_mutex); + + memset(kobj, 0, sizeof(*kobj)); + dev_put(queue->dev); +} + +static struct kobj_type netdev_queue_ktype = { + .sysfs_ops = &netdev_queue_sysfs_ops, + .release = netdev_queue_release, + .default_attrs = netdev_queue_default_attrs, +}; + +static int netdev_queue_add_kobject(struct net_device *net, int index) +{ + struct netdev_queue *queue = net->_tx + index; + struct kobject *kobj = &queue->kobj; + int error = 0; + + kobj->kset = net->queues_kset; + error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, + "tx-%u", index); + if (error) { + kobject_put(kobj); + return error; + } + + kobject_uevent(kobj, KOBJ_ADD); + dev_hold(queue->dev); + + return error; +} +#endif /* CONFIG_XPS */ + +int +netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) +{ +#ifdef CONFIG_XPS + int i; + int error = 0; + + for (i = old_num; i < new_num; i++) { + error = netdev_queue_add_kobject(net, i); + if (error) { + new_num = old_num; + break; + } + } + + while (--i >= new_num) + kobject_put(&net->_tx[i].kobj); + + return error; +#else + return 0; +#endif +} + +static int register_queue_kobjects(struct net_device *net) +{ + int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; + +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; - return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); +#endif + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + error = net_rx_queue_update_kobjects(net, 0, real_rx); + if (error) + goto error; + rxq = real_rx; + + error = netdev_queue_update_kobjects(net, 0, real_tx); + if (error) + goto error; + txq = real_tx; + + return 0; + +error: + netdev_queue_update_kobjects(net, txq, 0); + net_rx_queue_update_kobjects(net, rxq, 0); + return error; } -static void rx_queue_remove_kobjects(struct net_device *net) +static void remove_queue_kobjects(struct net_device *net) { - net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); + int real_rx = 0, real_tx = 0; + +#ifdef CONFIG_RPS + real_rx = net->real_num_rx_queues; +#endif + real_tx = net->real_num_tx_queues; + + net_rx_queue_update_kobjects(net, real_rx, 0); + netdev_queue_update_kobjects(net, real_tx, 0); +#if defined(CONFIG_RPS) || defined(CONFIG_XPS) kset_unregister(net->queues_kset); +#endif } -#endif /* CONFIG_RPS */ static const void *net_current_ns(void) { @@ -889,9 +1286,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); -#ifdef CONFIG_RPS - rx_queue_remove_kobjects(net); -#endif + remove_queue_kobjects(net); device_del(dev); } @@ -930,13 +1325,11 @@ int netdev_register_kobject(struct net_device *net) if (error) return error; -#ifdef CONFIG_RPS - error = rx_queue_register_kobjects(net); + error = register_queue_kobjects(net); if (error) { device_del(dev); return error; } -#endif return error; } diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 778e1571548d..bd7751ec1c4d 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -4,8 +4,8 @@ int netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); -#ifdef CONFIG_RPS int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); -#endif +int netdev_queue_update_kobjects(struct net_device *net, + int old_num, int new_num); #endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4e98ffac3af0..ee38acb6d463 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq) || + if (netif_tx_queue_frozen_or_stopped(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 33bc3823ac6f..2953b2abc971 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -378,6 +378,7 @@ struct pktgen_dev { u16 queue_map_min; u16 queue_map_max; + __u32 skb_priority; /* skb priority field */ int node; /* Memory node */ #ifdef CONFIG_XFRM @@ -394,6 +395,8 @@ struct pktgen_hdr { __be32 tv_usec; }; +static bool pktgen_exiting __read_mostly; + struct pktgen_thread { spinlock_t if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ @@ -547,6 +550,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->queue_map_min, pkt_dev->queue_map_max); + if (pkt_dev->skb_priority) + seq_printf(seq, " skb_priority: %u\n", + pkt_dev->skb_priority); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -1711,6 +1718,18 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + if (!strcmp(name, "skb_priority")) { + len = num_arg(&user_buffer[i], 9, &value); + if (len < 0) + return len; + + i += len; + pkt_dev->skb_priority = value; + sprintf(pg_result, "OK: skb_priority=%i", + pkt_dev->skb_priority); + return count; + } + sprintf(pkt_dev->result, "No such parameter \"%s\"", name); return -EINVAL; } @@ -2671,6 +2690,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); + skb->priority = pkt_dev->skb_priority; + iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -3016,6 +3037,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); + skb->priority = pkt_dev->skb_priority; iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3431,11 +3453,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t) remove_proc_entry(t->tsk->comm, pg_proc_dir); - mutex_lock(&pktgen_thread_lock); - - list_del(&t->th_list); - - mutex_unlock(&pktgen_thread_lock); } static void pktgen_resched(struct pktgen_dev *pkt_dev) @@ -3510,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) __netif_tx_lock_bh(txq); - if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { + if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; @@ -3582,6 +3599,8 @@ static int pktgen_thread_worker(void *arg) pkt_dev = next_to_run(t); if (unlikely(!pkt_dev && t->control == 0)) { + if (pktgen_exiting) + break; wait_event_interruptible_timeout(t->queue, t->control != 0, HZ/10); @@ -3634,6 +3653,13 @@ static int pktgen_thread_worker(void *arg) pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); + /* Wait for kthread_stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; } @@ -3908,6 +3934,7 @@ static void __exit pg_cleanup(void) struct list_head *q, *n; /* Stop all interfaces & threads */ + pktgen_exiting = true; list_for_each_safe(q, n, &pktgen_threads) { t = list_entry(q, struct pktgen_thread, th_list); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index fceeb37d7161..182236b2510a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -33,6 +33,7 @@ * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; +EXPORT_SYMBOL(sysctl_max_syn_backlog); int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 841c287ef40a..750db57f3bb3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev) return size; } +static LIST_HEAD(rtnl_af_ops); + +static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +{ + const struct rtnl_af_ops *ops; + + list_for_each_entry(ops, &rtnl_af_ops, list) { + if (ops->family == family) + return ops; + } + + return NULL; +} + +/** + * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * The caller must hold the rtnl_mutex. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_af_register(struct rtnl_af_ops *ops) +{ + list_add_tail(&ops->list, &rtnl_af_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__rtnl_af_register); + +/** + * rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_af_register(struct rtnl_af_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_af_register(ops); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(rtnl_af_register); + +/** + * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + list_del(&ops->list); +} +EXPORT_SYMBOL_GPL(__rtnl_af_unregister); + +/** + * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + */ +void rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + rtnl_lock(); + __rtnl_af_unregister(ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_af_unregister); + +static size_t rtnl_link_get_af_size(const struct net_device *dev) +{ + struct rtnl_af_ops *af_ops; + size_t size; + + /* IFLA_AF_SPEC */ + size = nla_total_size(sizeof(struct nlattr)); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_link_af_size) { + /* AF_* + nested data */ + size += nla_total_size(sizeof(struct nlattr)) + + af_ops->get_link_af_size(dev); + } + } + + return size; +} + static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - struct nlattr *attr; + struct nlattr *attr, *af_spec; + struct rtnl_af_ops *af_ops; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_link_af) { + struct nlattr *af; + int err; + + if (!(af = nla_nest_start(skb, af_ops->family))) + goto nla_put_failure; + + err = af_ops->fill_link_af(skb, dev); + + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, af_spec); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, + [IFLA_AF_SPEC] = { .type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -985,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EINVAL; } + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem, err; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + return -EAFNOSUPPORT; + + if (!af_ops->set_link_af) + return -EOPNOTSUPP; + + if (af_ops->validate_link_af) { + err = af_ops->validate_link_af(dev, + tb[IFLA_AF_SPEC]); + if (err < 0) + return err; + } + } + } + return 0; } @@ -1225,6 +1369,24 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; modified = 1; } + + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + BUG(); + + err = af_ops->set_link_af(dev, af); + if (err < 0) + goto errout; + + modified = 1; + } + } err = 0; errout: diff --git a/net/core/scm.c b/net/core/scm.c index 413cab89017d..bbe454450801 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) return -ENOMEM; *fplp = fpl; fpl->count = 0; + fpl->max = SCM_MAX_FD; } fpp = &fpl->fp[fpl->count]; - if (fpl->count + num > SCM_MAX_FD) + if (fpl->count + num > fpl->max) return -EINVAL; /* @@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) if (!fpl) return NULL; - new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); + new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), + GFP_KERNEL); if (new_fpl) { - for (i=fpl->count-1; i>=0; i--) + for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); - memcpy(new_fpl, fpl, sizeof(*fpl)); + new_fpl->max = new_fpl->count; } return new_fpl; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 104f8444754a..8814a9a52f47 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -778,6 +778,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); + /* Check if we can avoid taking references on fragments if we own + * the last reference on skb->head. (see skb_release_data()) + */ + if (!skb->cloned) + fastpath = true; + else { + int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; + } + + if (fastpath && + size + sizeof(struct skb_shared_info) <= ksize(skb->head)) { + memmove(skb->head + size, skb_shinfo(skb), + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + memmove(skb->head + nhead, skb->head, + skb_tail_pointer(skb) - skb->head); + off = nhead; + goto adjust_others; + } + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) goto nodata; @@ -791,17 +813,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); - /* Check if we can avoid taking references on fragments if we own - * the last reference on skb->head. (see skb_release_data()) - */ - if (!skb->cloned) - fastpath = true; - else { - int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; - } - if (fastpath) { kfree(skb->head); } else { @@ -816,6 +827,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, off = (data + nhead) - skb->head; skb->head = data; +adjust_others: skb->data += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 0ae6c22da85b..b124d28ff1c8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -26,12 +26,12 @@ static struct sock_filter ptp_filter[] = { PTP_FILTER }; -static unsigned int classify(struct sk_buff *skb) +static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && skb->dev->phydev->drv)) - return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + return sk_run_filter(skb, ptp_filter); else return PTP_CLASS_NONE; } diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8dea..5c8362b037ed 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o -dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o - +dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ + qpolicy.o # # CCID algorithms to be used by dccp.ko # diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 92a6fcb40d7d..25b7a8d1ad58 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -1,444 +1,375 @@ /* * net/dccp/ackvec.c * - * An implementation of the DCCP protocol + * An implementation of Ack Vectors for the DCCP protocol + * Copyright (c) 2007 University of Aberdeen, Scotland, UK * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; version 2 of the License; */ - -#include "ackvec.h" #include "dccp.h" - -#include <linux/init.h> -#include <linux/errno.h> #include <linux/kernel.h> -#include <linux/skbuff.h> #include <linux/slab.h> -#include <net/sock.h> - static struct kmem_cache *dccp_ackvec_slab; static struct kmem_cache *dccp_ackvec_record_slab; -static struct dccp_ackvec_record *dccp_ackvec_record_new(void) +struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) { - struct dccp_ackvec_record *avr = - kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); + struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority); - if (avr != NULL) - INIT_LIST_HEAD(&avr->avr_node); - - return avr; + if (av != NULL) { + av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1; + INIT_LIST_HEAD(&av->av_records); + } + return av; } -static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr) +static void dccp_ackvec_purge_records(struct dccp_ackvec *av) { - if (unlikely(avr == NULL)) - return; - /* Check if deleting a linked record */ - WARN_ON(!list_empty(&avr->avr_node)); - kmem_cache_free(dccp_ackvec_record_slab, avr); + struct dccp_ackvec_record *cur, *next; + + list_for_each_entry_safe(cur, next, &av->av_records, avr_node) + kmem_cache_free(dccp_ackvec_record_slab, cur); + INIT_LIST_HEAD(&av->av_records); } -static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, - struct dccp_ackvec_record *avr) +void dccp_ackvec_free(struct dccp_ackvec *av) { - /* - * AVRs are sorted by seqno. Since we are sending them in order, we - * just add the AVR at the head of the list. - * -sorbo. - */ - if (!list_empty(&av->av_records)) { - const struct dccp_ackvec_record *head = - list_entry(av->av_records.next, - struct dccp_ackvec_record, - avr_node); - BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno)); + if (likely(av != NULL)) { + dccp_ackvec_purge_records(av); + kmem_cache_free(dccp_ackvec_slab, av); } - - list_add(&avr->avr_node, &av->av_records); } -int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +/** + * dccp_ackvec_update_records - Record information about sent Ack Vectors + * @av: Ack Vector records to update + * @seqno: Sequence number of the packet carrying the Ack Vector just sent + * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector + */ +int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; - /* Figure out how many options do we need to represent the ackvec */ - const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); - u16 len = av->av_vec_len + 2 * nr_opts, i; - u32 elapsed_time; - const unsigned char *tail, *from; - unsigned char *to; struct dccp_ackvec_record *avr; - suseconds_t delta; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - delta = ktime_us_delta(ktime_get_real(), av->av_time); - elapsed_time = delta / 10; - if (elapsed_time != 0 && - dccp_insert_option_elapsed_time(skb, elapsed_time)) - return -1; - - avr = dccp_ackvec_record_new(); + avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); if (avr == NULL) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - len = av->av_vec_len; - from = av->av_buf + av->av_buf_head; - tail = av->av_buf + DCCP_MAX_ACKVEC_LEN; - - for (i = 0; i < nr_opts; ++i) { - int copylen = len; - - if (len > DCCP_SINGLE_OPT_MAXLEN) - copylen = DCCP_SINGLE_OPT_MAXLEN; - - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = copylen + 2; - - /* Check if buf_head wraps */ - if (from + copylen > tail) { - const u16 tailsize = tail - from; - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - copylen -= tailsize; - from = av->av_buf; - } - - memcpy(to, from, copylen); - from += copylen; - to += copylen; - len -= copylen; - } + return -ENOBUFS; + avr->avr_ack_seqno = seqno; + avr->avr_ack_ptr = av->av_buf_head; + avr->avr_ack_ackno = av->av_buf_ackno; + avr->avr_ack_nonce = nonce_sum; + avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head); /* - * From RFC 4340, A.2: - * - * For each acknowledgement it sends, the HC-Receiver will add an - * acknowledgement record. ack_seqno will equal the HC-Receiver - * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will - * equal buf_nonce. + * When the buffer overflows, we keep no more than one record. This is + * the simplest way of disambiguating sender-Acks dating from before the + * overflow from sender-Acks which refer to after the overflow; a simple + * solution is preferable here since we are handling an exception. */ - avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - avr->avr_ack_ptr = av->av_buf_head; - avr->avr_ack_ackno = av->av_buf_ackno; - avr->avr_ack_nonce = av->av_buf_nonce; - avr->avr_sent_len = av->av_vec_len; - - dccp_ackvec_insert_avr(av, avr); + if (av->av_overflow) + dccp_ackvec_purge_records(av); + /* + * Since GSS is incremented for each packet, the list is automatically + * arranged in descending order of @ack_seqno. + */ + list_add(&avr->avr_node, &av->av_records); - dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu\n", - dccp_role(sk), avr->avr_sent_len, + dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n", (unsigned long long)avr->avr_ack_seqno, - (unsigned long long)avr->avr_ack_ackno); + (unsigned long long)avr->avr_ack_ackno, + avr->avr_ack_runlen); return 0; } -struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) +static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, + const u64 ackno) { - struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); - - if (av != NULL) { - av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1; - av->av_buf_ackno = UINT48_MAX + 1; - av->av_buf_nonce = 0; - av->av_time = ktime_set(0, 0); - av->av_vec_len = 0; - INIT_LIST_HEAD(&av->av_records); + struct dccp_ackvec_record *avr; + /* + * Exploit that records are inserted in descending order of sequence + * number, start with the oldest record first. If @ackno is `before' + * the earliest ack_ackno, the packet is too old to be considered. + */ + list_for_each_entry_reverse(avr, av_list, avr_node) { + if (avr->avr_ack_seqno == ackno) + return avr; + if (before48(ackno, avr->avr_ack_seqno)) + break; } - - return av; + return NULL; } -void dccp_ackvec_free(struct dccp_ackvec *av) +/* + * Buffer index and length computation using modulo-buffersize arithmetic. + * Note that, as pointers move from right to left, head is `before' tail. + */ +static inline u16 __ackvec_idx_add(const u16 a, const u16 b) { - if (unlikely(av == NULL)) - return; - - if (!list_empty(&av->av_records)) { - struct dccp_ackvec_record *avr, *next; - - list_for_each_entry_safe(avr, next, &av->av_records, avr_node) { - list_del_init(&avr->avr_node); - dccp_ackvec_record_delete(avr); - } - } - - kmem_cache_free(dccp_ackvec_slab, av); + return (a + b) % DCCPAV_MAX_ACKVEC_LEN; } -static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, - const u32 index) +static inline u16 __ackvec_idx_sub(const u16 a, const u16 b) { - return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK; + return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b); } -static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, - const u32 index) +u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) { - return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK; + if (unlikely(av->av_overflow)) + return DCCPAV_MAX_ACKVEC_LEN; + return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); } -/* - * If several packets are missing, the HC-Receiver may prefer to enter multiple - * bytes with run length 0, rather than a single byte with a larger run length; - * this simplifies table updates if one of the missing packets arrives. +/** + * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 + * @av: non-empty buffer to update + * @distance: negative or zero distance of @seqno from buf_ackno downward + * @seqno: the (old) sequence number whose record is to be updated + * @state: state in which packet carrying @seqno was received */ -static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, - const unsigned int packets, - const unsigned char state) +static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, + u64 seqno, enum dccp_ackvec_states state) { - long gap; - long new_head; + u16 ptr = av->av_buf_head; - if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) - return -ENOBUFS; + BUG_ON(distance > 0); + if (unlikely(dccp_ackvec_is_empty(av))) + return; - gap = packets - 1; - new_head = av->av_buf_head - packets; + do { + u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); - if (new_head < 0) { - if (gap > 0) { - memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, - gap + new_head + 1); - gap = -new_head; + if (distance + runlen >= 0) { + /* + * Only update the state if packet has not been received + * yet. This is OK as per the second table in RFC 4340, + * 11.4.1; i.e. here we are using the following table: + * RECEIVED + * 0 1 3 + * S +---+---+---+ + * T 0 | 0 | 0 | 0 | + * O +---+---+---+ + * R 1 | 1 | 1 | 1 | + * E +---+---+---+ + * D 3 | 0 | 1 | 3 | + * +---+---+---+ + * The "Not Received" state was set by reserve_seats(). + */ + if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) + av->av_buf[ptr] = state; + else + dccp_pr_debug("Not changing %llu state to %u\n", + (unsigned long long)seqno, state); + break; } - new_head += DCCP_MAX_ACKVEC_LEN; - } - av->av_buf_head = new_head; + distance += runlen + 1; + ptr = __ackvec_idx_add(ptr, 1); - if (gap > 0) - memset(av->av_buf + av->av_buf_head + 1, - DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); + } while (ptr != av->av_buf_tail); +} - av->av_buf[av->av_buf_head] = state; - av->av_vec_len += packets; - return 0; +/* Mark @num entries after buf_head as "Not yet received". */ +static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) +{ + u16 start = __ackvec_idx_add(av->av_buf_head, 1), + len = DCCPAV_MAX_ACKVEC_LEN - start; + + /* check for buffer wrap-around */ + if (num > len) { + memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); + start = 0; + num -= len; + } + if (num) + memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); } -/* - * Implements the RFC 4340, Appendix A +/** + * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer + * @av: container of buffer to update (can be empty or non-empty) + * @num_packets: number of packets to register (must be >= 1) + * @seqno: sequence number of the first packet in @num_packets + * @state: state in which packet carrying @seqno was received */ -int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state) +static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, + u64 seqno, enum dccp_ackvec_states state) { - /* - * Check at the right places if the buffer is full, if it is, tell the - * caller to start dropping packets till the HC-Sender acks our ACK - * vectors, when we will free up space in av_buf. - * - * We may well decide to do buffer compression, etc, but for now lets - * just drop. - * - * From Appendix A.1.1 (`New Packets'): - * - * Of course, the circular buffer may overflow, either when the - * HC-Sender is sending data at a very high rate, when the - * HC-Receiver's acknowledgements are not reaching the HC-Sender, - * or when the HC-Sender is forgetting to acknowledge those acks - * (so the HC-Receiver is unable to clean up old state). In this - * case, the HC-Receiver should either compress the buffer (by - * increasing run lengths when possible), transfer its state to - * a larger buffer, or, as a last resort, drop all received - * packets, without processing them whatsoever, until its buffer - * shrinks again. - */ + u32 num_cells = num_packets; - /* See if this is the first ackno being inserted */ - if (av->av_vec_len == 0) { - av->av_buf[av->av_buf_head] = state; - av->av_vec_len = 1; - } else if (after48(ackno, av->av_buf_ackno)) { - const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); + if (num_packets > DCCPAV_BURST_THRESH) { + u32 lost_packets = num_packets - 1; + DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); /* - * Look if the state of this packet is the same as the - * previous ackno and if so if we can bump the head len. + * We received 1 packet and have a loss of size "num_packets-1" + * which we squeeze into num_cells-1 rather than reserving an + * entire byte for each lost packet. + * The reason is that the vector grows in O(burst_length); when + * it grows too large there will no room left for the payload. + * This is a trade-off: if a few packets out of the burst show + * up later, their state will not be changed; it is simply too + * costly to reshuffle/reallocate/copy the buffer each time. + * Should such problems persist, we will need to switch to a + * different underlying data structure. */ - if (delta == 1 && - dccp_ackvec_state(av, av->av_buf_head) == state && - dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK) - av->av_buf[av->av_buf_head]++; - else if (dccp_ackvec_set_buf_head_state(av, delta, state)) - return -ENOBUFS; - } else { - /* - * A.1.2. Old Packets - * - * When a packet with Sequence Number S <= buf_ackno - * arrives, the HC-Receiver will scan the table for - * the byte corresponding to S. (Indexing structures - * could reduce the complexity of this scan.) - */ - u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); - u32 index = av->av_buf_head; + for (num_packets = num_cells = 1; lost_packets; ++num_cells) { + u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN); - while (1) { - const u8 len = dccp_ackvec_len(av, index); - const u8 av_state = dccp_ackvec_state(av, index); - /* - * valid packets not yet in av_buf have a reserved - * entry, with a len equal to 0. - */ - if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our - reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", - (unsigned long long)ackno); - av->av_buf[index] = state; - goto out; - } - /* len == 0 means one packet */ - if (delta < len + 1) - goto out_duplicate; - - delta -= len + 1; - if (++index == DCCP_MAX_ACKVEC_LEN) - index = 0; + av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); + av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; + + lost_packets -= len; } } - av->av_buf_ackno = ackno; - av->av_time = ktime_get_real(); -out: - return 0; + if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { + DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); + av->av_overflow = true; + } + + av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); + if (av->av_overflow) + av->av_buf_tail = av->av_buf_head; -out_duplicate: - /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost " - "packet: %llu\n", (unsigned long long)ackno); - return -EILSEQ; + av->av_buf[av->av_buf_head] = state; + av->av_buf_ackno = seqno; + + if (num_packets > 1) + dccp_ackvec_reserve_seats(av, num_packets - 1); } -static void dccp_ackvec_throw_record(struct dccp_ackvec *av, - struct dccp_ackvec_record *avr) +/** + * dccp_ackvec_input - Register incoming packet in the buffer + */ +void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) { - struct dccp_ackvec_record *next; + u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; + enum dccp_ackvec_states state = DCCPAV_RECEIVED; - /* sort out vector length */ - if (av->av_buf_head <= avr->avr_ack_ptr) - av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; - else - av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 - - av->av_buf_head + avr->avr_ack_ptr; + if (dccp_ackvec_is_empty(av)) { + dccp_ackvec_add_new(av, 1, seqno, state); + av->av_tail_ackno = seqno; - /* free records */ - list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { - list_del_init(&avr->avr_node); - dccp_ackvec_record_delete(avr); - } -} + } else { + s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); + u8 *current_head = av->av_buf + av->av_buf_head; -void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, - const u64 ackno) -{ - struct dccp_ackvec_record *avr; + if (num_packets == 1 && + dccp_ackvec_state(current_head) == state && + dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { - /* - * If we traverse backwards, it should be faster when we have large - * windows. We will be receiving ACKs for stuff we sent a while back - * -sorbo. - */ - list_for_each_entry_reverse(avr, &av->av_records, avr_node) { - if (ackno == avr->avr_ack_seqno) { - dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu, ACKED!\n", - dccp_role(sk), 1, - (unsigned long long)avr->avr_ack_seqno, - (unsigned long long)avr->avr_ack_ackno); - dccp_ackvec_throw_record(av, avr); - break; - } else if (avr->avr_ack_seqno > ackno) - break; /* old news */ + *current_head += 1; + av->av_buf_ackno = seqno; + + } else if (num_packets > 0) { + dccp_ackvec_add_new(av, num_packets, seqno, state); + } else { + dccp_ackvec_update_old(av, num_packets, seqno, state); + } } } -static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, - struct sock *sk, u64 *ackno, - const unsigned char len, - const unsigned char *vector) +/** + * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection + * This routine is called when the peer acknowledges the receipt of Ack Vectors + * up to and including @ackno. While based on on section A.3 of RFC 4340, here + * are additional precautions to prevent corrupted buffer state. In particular, + * we use tail_ackno to identify outdated records; it always marks the earliest + * packet of group (2) in 11.4.2. + */ +void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) { - unsigned char i; - struct dccp_ackvec_record *avr; + struct dccp_ackvec_record *avr, *next; + u8 runlen_now, eff_runlen; + s64 delta; - /* Check if we actually sent an ACK vector */ - if (list_empty(&av->av_records)) + avr = dccp_ackvec_lookup(&av->av_records, ackno); + if (avr == NULL) return; + /* + * Deal with outdated acknowledgments: this arises when e.g. there are + * several old records and the acks from the peer come in slowly. In + * that case we may still have records that pre-date tail_ackno. + */ + delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); + if (delta < 0) + goto free_records; + /* + * Deal with overlapping Ack Vectors: don't subtract more than the + * number of packets between tail_ackno and ack_ackno. + */ + eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; - i = len; + runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); /* - * XXX - * I think it might be more efficient to work backwards. See comment on - * rcv_ackno. -sorbo. + * The run length of Ack Vector cells does not decrease over time. If + * the run length is the same as at the time the Ack Vector was sent, we + * free the ack_ptr cell. That cell can however not be freed if the run + * length has increased: in this case we need to move the tail pointer + * backwards (towards higher indices), to its next-oldest neighbour. */ - avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); - while (i--) { - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - u64 ackno_end_rl; + if (runlen_now > eff_runlen) { - dccp_set_seqno(&ackno_end_rl, *ackno - rl); + av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; + av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); + /* This move may not have cleared the overflow flag. */ + if (av->av_overflow) + av->av_overflow = (av->av_buf_head == av->av_buf_tail); + } else { + av->av_buf_tail = avr->avr_ack_ptr; /* - * If our AVR sequence number is greater than the ack, go - * forward in the AVR list until it is not so. + * We have made sure that avr points to a valid cell within the + * buffer. This cell is either older than head, or equals head + * (empty buffer): in both cases we no longer have any overflow. */ - list_for_each_entry_from(avr, &av->av_records, avr_node) { - if (!after48(avr->avr_ack_seqno, *ackno)) - goto found; - } - /* End of the av_records list, not found, exit */ - break; -found: - if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { - const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; - if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { - dccp_pr_debug("%s ACK vector 0, len=%d, " - "ack_seqno=%llu, ack_ackno=%llu, " - "ACKED!\n", - dccp_role(sk), len, - (unsigned long long) - avr->avr_ack_seqno, - (unsigned long long) - avr->avr_ack_ackno); - dccp_ackvec_throw_record(av, avr); - break; - } - /* - * If it wasn't received, continue scanning... we might - * find another one. - */ - } + av->av_overflow = 0; + } - dccp_set_seqno(ackno, ackno_end_rl - 1); - ++vector; + /* + * The peer has acknowledged up to and including ack_ackno. Hence the + * first packet in group (2) of 11.4.2 is the successor of ack_ackno. + */ + av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); + +free_records: + list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { + list_del(&avr->avr_node); + kmem_cache_free(dccp_ackvec_record_slab, avr); } } -int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - u64 *ackno, const u8 opt, const u8 *value, const u8 len) +/* + * Routines to keep track of Ack Vectors received in an skb + */ +int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) { - if (len > DCCP_SINGLE_OPT_MAXLEN) - return -1; + struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); + + if (new == NULL) + return -ENOBUFS; + new->vec = vec; + new->len = len; + new->nonce = nonce; - /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ - dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, - ackno, len, value); + list_add_tail(&new->node, head); return 0; } +EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); + +void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) +{ + struct dccp_ackvec_parsed *cur, *next; + + list_for_each_entry_safe(cur, next, parsed_chunks, node) + kfree(cur); + INIT_LIST_HEAD(parsed_chunks); +} +EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); int __init dccp_ackvec_init(void) { @@ -448,10 +379,9 @@ int __init dccp_ackvec_init(void) if (dccp_ackvec_slab == NULL) goto out_err; - dccp_ackvec_record_slab = - kmem_cache_create("dccp_ackvec_record", - sizeof(struct dccp_ackvec_record), - 0, SLAB_HWCACHE_ALIGN, NULL); + dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", + sizeof(struct dccp_ackvec_record), + 0, SLAB_HWCACHE_ALIGN, NULL); if (dccp_ackvec_record_slab == NULL) goto out_destroy_slab; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 7ea557b7c6b1..e2ab0627a5ff 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -3,9 +3,9 @@ /* * net/dccp/ackvec.h * - * An implementation of the DCCP protocol + * An implementation of Ack Vectors for the DCCP protocol + * Copyright (c) 2007 University of Aberdeen, Scotland, UK * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> - * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -13,99 +13,124 @@ #include <linux/dccp.h> #include <linux/compiler.h> -#include <linux/ktime.h> #include <linux/list.h> #include <linux/types.h> -/* We can spread an ack vector across multiple options */ -#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) +/* + * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN, + * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1 + * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives + * more headroom if Ack Ratio is higher or when the sender acknowledges slowly. + * The maximum value is bounded by the u16 types for indices and functions. + */ +#define DCCPAV_NUM_ACKVECS 2 +#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS) /* Estimated minimum average Ack Vector length - used for updating MPS */ #define DCCPAV_MIN_OPTLEN 16 -#define DCCP_ACKVEC_STATE_RECEIVED 0 -#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) -#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) +/* Threshold for coping with large bursts of losses */ +#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) -#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ -#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ +enum dccp_ackvec_states { + DCCPAV_RECEIVED = 0x00, + DCCPAV_ECN_MARKED = 0x40, + DCCPAV_RESERVED = 0x80, + DCCPAV_NOT_RECEIVED = 0xC0 +}; +#define DCCPAV_MAX_RUNLEN 0x3F -/** struct dccp_ackvec - ack vector - * - * This data structure is the one defined in RFC 4340, Appendix A. - * - * @av_buf_head - circular buffer head - * @av_buf_tail - circular buffer tail - * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the - * buffer (i.e. %av_buf_head) - * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked - * by the buffer with State 0 - * - * Additionally, the HC-Receiver must keep some information about the - * Ack Vectors it has recently sent. For each packet sent carrying an - * Ack Vector, it remembers four variables: +static inline u8 dccp_ackvec_runlen(const u8 *cell) +{ + return *cell & DCCPAV_MAX_RUNLEN; +} + +static inline u8 dccp_ackvec_state(const u8 *cell) +{ + return *cell & ~DCCPAV_MAX_RUNLEN; +} + +/** struct dccp_ackvec - Ack Vector main data structure * - * @av_records - list of dccp_ackvec_record - * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * This implements a fixed-size circular buffer within an array and is largely + * based on Appendix A of RFC 4340. * - * @av_time - the time in usecs - * @av_buf - circular buffer of acknowledgeable packets + * @av_buf: circular buffer storage area + * @av_buf_head: head index; begin of live portion in @av_buf + * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf + * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf + * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf + * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to + * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf + * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound + * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) */ struct dccp_ackvec { - u64 av_buf_ackno; - struct list_head av_records; - ktime_t av_time; + u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; u16 av_buf_head; - u16 av_vec_len; - u8 av_buf_nonce; - u8 av_ack_nonce; - u8 av_buf[DCCP_MAX_ACKVEC_LEN]; + u16 av_buf_tail; + u64 av_buf_ackno:48; + u64 av_tail_ackno:48; + bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; + u8 av_overflow:1; + struct list_head av_records; }; -/** struct dccp_ackvec_record - ack vector record +/** struct dccp_ackvec_record - Records information about sent Ack Vectors * - * ACK vector record as defined in Appendix A of spec. + * These list entries define the additional information which the HC-Receiver + * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A. * - * The list is sorted by avr_ack_seqno + * @avr_node: the list node in @av_records + * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on + * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to + * @avr_ack_ptr: pointer into @av_buf where this record starts + * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending + * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent * - * @avr_node - node in av_records - * @avr_ack_seqno - sequence number of the packet this record was sent on - * @avr_ack_ackno - sequence number being acknowledged - * @avr_ack_ptr - pointer into av_buf where this record starts - * @avr_ack_nonce - av_ack_nonce at the time this record was sent - * @avr_sent_len - lenght of the record in av_buf + * The list as a whole is sorted in descending order by @avr_ack_seqno. */ struct dccp_ackvec_record { struct list_head avr_node; - u64 avr_ack_seqno; - u64 avr_ack_ackno; + u64 avr_ack_seqno:48; + u64 avr_ack_ackno:48; u16 avr_ack_ptr; - u16 avr_sent_len; - u8 avr_ack_nonce; + u8 avr_ack_runlen; + u8 avr_ack_nonce:1; }; -struct sock; -struct sk_buff; - extern int dccp_ackvec_init(void); extern void dccp_ackvec_exit(void); extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); extern void dccp_ackvec_free(struct dccp_ackvec *av); -extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state); - -extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, - struct sock *sk, const u64 ackno); -extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - u64 *ackno, const u8 opt, - const u8 *value, const u8 len); +extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); +extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); +extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); +extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); -extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); - -static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) { - return av->av_vec_len; + return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; } + +/** + * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb + * @vec: start of vector (offset into skb) + * @len: length of @vec + * @nonce: whether @vec had an ECN nonce of 0 or 1 + * @node: FIFO - arranged in descending order of ack_ackno + * This structure is used by CCIDs to access Ack Vectors in a received skb. + */ +struct dccp_ackvec_parsed { + u8 *vec, + len, + nonce:1; + struct list_head node; +}; + +extern int dccp_ackvec_parsed_add(struct list_head *head, + u8 *vec, u8 len, u8 nonce); +extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 6576eae9e779..e96d5e810039 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) #endif } -/* XXX Lame code duplication! - * returns -1 if none was found. - * else returns the next offset to use in the function call. - */ -static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, - unsigned char **vec, unsigned char *veclen) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); - unsigned char *opt_ptr; - const unsigned char *opt_end = (unsigned char *)dh + - (dh->dccph_doff * 4); - unsigned char opt, len; - unsigned char *value; - - BUG_ON(offset < 0); - options += offset; - opt_ptr = options; - if (opt_ptr >= opt_end) - return -1; - - while (opt_ptr != opt_end) { - opt = *opt_ptr++; - len = 0; - value = NULL; - - /* Check if this isn't a single byte option */ - if (opt > DCCPO_MAX_RESERVED) { - if (opt_ptr == opt_end) - goto out_invalid_option; - - len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; - /* - * Remove the type and len fields, leaving - * just the value size - */ - len -= 2; - value = opt_ptr; - opt_ptr += len; - - if (opt_ptr > opt_end) - goto out_invalid_option; - } - - switch (opt) { - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - *vec = value; - *veclen = len; - return offset + (opt_ptr - options); - } - } - - return -1; - -out_invalid_option: - DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); - return -1; -} - /** * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm * This code is almost identical with TCP's tcp_rtt_estimator(), since @@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); } +static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, + u8 option, u8 *optval, u8 optlen) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + + switch (option) { + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, + option - DCCPO_ACK_VECTOR_0); + } + return 0; +} + static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); + struct dccp_ackvec_parsed *avp; u64 ackno, seqno; struct ccid2_seq *seqp; - unsigned char *vector; - unsigned char veclen; - int offset = 0; int done = 0; unsigned int maxincr = 0; @@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* check forward path congestion */ - /* still didn't send out new data packets */ - if (hc->tx_seqh == hc->tx_seqt) + if (dccp_packet_without_ack(skb)) return; - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_ACK: - case DCCP_PKT_DATAACK: - break; - default: - return; - } + /* still didn't send out new data packets */ + if (hc->tx_seqh == hc->tx_seqt) + goto done; ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; if (after48(ackno, hc->tx_high_ack)) @@ -509,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ - while ((offset = ccid2_ackvector(sk, skb, offset, - &vector, &veclen)) != -1) { + list_for_each_entry(avp, &hc->tx_av_chunks, node) { /* go through this ack vector */ - while (veclen--) { - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - u64 ackno_end_rl = SUB48(ackno, rl); + for (; avp->len--; avp->vec++) { + u64 ackno_end_rl = SUB48(ackno, + dccp_ackvec_runlen(avp->vec)); - ccid2_pr_debug("ackvec start:%llu end:%llu\n", + ccid2_pr_debug("ackvec %llu |%u,%u|\n", (unsigned long long)ackno, - (unsigned long long)ackno_end_rl); + dccp_ackvec_state(avp->vec) >> 6, + dccp_ackvec_runlen(avp->vec)); /* if the seqno we are analyzing is larger than the * current ackno, then move towards the tail of our * seqnos. @@ -537,17 +482,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = *vector & - DCCP_ACKVEC_STATE_MASK; + const u8 state = dccp_ackvec_state(avp->vec); /* new packet received or marked */ - if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && + if (state != DCCPAV_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == - DCCP_ACKVEC_STATE_ECN_MARKED) { + if (state == DCCPAV_ECN_MARKED) ccid2_congestion_event(sk, seqp); - } else + else ccid2_new_ack(sk, seqp, &maxincr); @@ -566,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) break; ackno = SUB48(ackno_end_rl, 1); - vector++; } if (done) break; @@ -634,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) sk_stop_timer(sk, &hc->tx_rtotimer); else sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); - +done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) @@ -666,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_last_cong = ccid2_time_stamp; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); + INIT_LIST_HEAD(&hc->tx_av_chunks); return 0; } @@ -699,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } struct ccid_operations ccid2_ops = { - .ccid_id = DCCPC_CCID2, - .ccid_name = "TCP-like", - .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), - .ccid_hc_tx_init = ccid2_hc_tx_init, - .ccid_hc_tx_exit = ccid2_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, - .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, - .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), - .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, + .ccid_id = DCCPC_CCID2, + .ccid_name = "TCP-like", + .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), + .ccid_hc_tx_init = ccid2_hc_tx_init, + .ccid_hc_tx_exit = ccid2_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, + .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, + .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, + .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), + .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 25cb6b216eda..e9985dafc2c7 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -55,6 +55,7 @@ struct ccid2_seq { * @tx_rtt_seq: to decay RTTVAR at most once per flight * @tx_rpseq: last consecutive seqno * @tx_rpdupack: dupacks since rpseq + * @tx_av_chunks: list of Ack Vectors received on current skb */ struct ccid2_hc_tx_sock { u32 tx_cwnd; @@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock { int tx_rpdupack; u32 tx_last_cong; u64 tx_high_ack; + struct list_head tx_av_chunks; }; static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a8ed459508b2..48ad5d9da7cb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -243,6 +243,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); +/* + * TX Packet Dequeueing Interface + */ +extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); +extern bool dccp_qpolicy_full(struct sock *sk); +extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); +extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); +extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); +extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); + +/* + * TX Packet Output and TX Timers + */ extern void dccp_write_xmit(struct sock *sk); extern void dccp_write_space(struct sock *sk); extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); @@ -457,12 +470,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) dp->dccps_awh = dp->dccps_gss; } +static inline int dccp_ackvec_pending(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL && + !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec); +} + static inline int dccp_ack_pending(const struct sock *sk) { - const struct dccp_sock *dp = dccp_sk(sk); - return (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || - inet_csk_ack_scheduled(sk); + return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } extern int dccp_feat_finalise_settings(struct dccp_sock *dp); diff --git a/net/dccp/input.c b/net/dccp/input.c index e424a09e83f6..15af247ea007 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } -static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) +static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; - if (dp->dccps_hc_rx_ackvec != NULL) - dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + if (av == NULL) + return; + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvec_input(av, skb); } static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) @@ -366,22 +368,13 @@ discard: int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { - struct dccp_sock *dp = dccp_sk(sk); - if (dccp_check_seqno(sk, skb)) goto discard; if (dccp_parse_options(sk, NULL, skb)) return 1; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_event_ack_recv(sk, skb); - - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; + dccp_handle_ackvec_processing(sk, skb); dccp_deliver_input_to_ccids(sk, skb); return __dccp_rcv_established(sk, skb, dh, len); @@ -633,15 +626,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, NULL, skb)) return 1; - if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_event_ack_recv(sk, skb); - - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; - + dccp_handle_ackvec_processing(sk, skb); dccp_deliver_input_to_ccids(sk, skb); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f69ea114829..45a434f94169 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, { struct rtable *rt; struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .nl_u = { .ip4_u = - { .daddr = ip_hdr(skb)->saddr, - .saddr = ip_hdr(skb)->daddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ip_hdr(skb)->saddr, + .fl4_src = ip_hdr(skb)->daddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = dccp_hdr(skb)->dccph_dport, - .dport = dccp_hdr(skb)->dccph_sport } - } + .fl_ip_sport = dccp_hdr(skb)->dccph_dport, + .fl_ip_dport = dccp_hdr(skb)->dccph_sport }; security_skb_classify_flow(skb, &fl); diff --git a/net/dccp/options.c b/net/dccp/options.c index cd3061813009..f06ffcfc8d71 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct dccp_sock *dp = dccp_sk(sk); const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; - u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; const unsigned char *opt_end = (unsigned char *)dh + @@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, if (rc) goto out_featneg_failed; break; - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ - break; - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) - goto out_invalid_option; - break; case DCCPO_TIMESTAMP: if (len != 4) goto out_invalid_option; @@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, pkt_type, opt, value, len)) goto out_invalid_option; break; + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ + break; + /* + * Ack vectors are processed by the TX CCID if it is + * interested. The RX CCID need not parse Ack Vectors, + * since it is only interested in clearing old state. + * Fall through. + */ case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, pkt_type, opt, value, len)) @@ -340,6 +341,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } +/* FIXME: This function is currently not used anywhere */ int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) { const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); @@ -424,6 +426,83 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, return 0; } +static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const u16 buflen = dccp_ackvec_buflen(av); + /* Figure out how many options do we need to represent the ackvec */ + const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); + u16 len = buflen + 2 * nr_opts; + u8 i, nonce = 0; + const unsigned char *tail, *from; + unsigned char *to; + + if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, + dccp_packet_name(dcb->dccpd_type)); + return -1; + } + /* + * Since Ack Vectors are variable-length, we can not always predict + * their size. To catch exception cases where the space is running out + * on the skb, a separate Sync is scheduled to carry the Ack Vector. + */ + if (len > DCCPAV_MIN_OPTLEN && + len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { + DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " + "MPS=%u ==> reduce payload size?\n", len, skb->len, + dcb->dccpd_opt_len, dp->dccps_mss_cache); + dp->dccps_sync_scheduled = 1; + return 0; + } + dcb->dccpd_opt_len += len; + + to = skb_push(skb, len); + len = buflen; + from = av->av_buf + av->av_buf_head; + tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; + + for (i = 0; i < nr_opts; ++i) { + int copylen = len; + + if (len > DCCP_SINGLE_OPT_MAXLEN) + copylen = DCCP_SINGLE_OPT_MAXLEN; + + /* + * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via + * its type; ack_nonce is the sum of all individual buf_nonce's. + */ + nonce ^= av->av_buf_nonce[i]; + + *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i]; + *to++ = copylen + 2; + + /* Check if buf_head wraps */ + if (from + copylen > tail) { + const u16 tailsize = tail - from; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + copylen -= tailsize; + from = av->av_buf; + } + + memcpy(to, from, copylen); + from += copylen; + to += copylen; + len -= copylen; + } + /* + * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. + */ + if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) + return -ENOBUFS; + return 0; +} + /** * dccp_insert_option_mandatory - Mandatory option (5.8.2) * Note that since we are using skb_push, this function needs to be called @@ -519,8 +598,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (dccp_insert_option_timestamp(skb)) return -1; - } else if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && + } else if (dccp_ackvec_pending(sk) && dccp_insert_option_ackvec(sk, skb)) { return -1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index 45b91853f5ae..784d30210543 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk) { int err, len; struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); + struct sk_buff *skb = dccp_qpolicy_pop(sk); if (unlikely(skb == NULL)) return; @@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk) * any local drop will eventually be reported via receiver feedback. */ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); + + /* + * If the CCID needs to transfer additional header options out-of-band + * (e.g. Ack Vectors or feature-negotiation options), it activates this + * flag to schedule a Sync. The Sync will automatically incorporate all + * currently pending header options, thus clearing the backlog. + */ + if (dp->dccps_sync_scheduled) + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /** @@ -336,7 +345,7 @@ void dccp_write_xmit(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - while ((skb = skb_peek(&sk->sk_write_queue))) { + while ((skb = dccp_qpolicy_top(sk))) { int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); switch (ccid_packet_dequeue_eval(rc)) { @@ -350,8 +359,7 @@ void dccp_write_xmit(struct sock *sk) dccp_xmit_packet(sk); break; case CCID_PACKET_ERR: - skb_dequeue(&sk->sk_write_queue); - kfree_skb(skb); + dccp_qpolicy_drop(sk, skb); dccp_pr_debug("packet discarded due to err=%d\n", rc); } } @@ -636,6 +644,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; + /* + * Clear the flag in case the Sync was scheduled for out-of-band data, + * such as carrying a long Ack Vector. + */ + dccp_sk(sk)->dccps_sync_scheduled = 0; + dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fcea..152975d942d9 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; + dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); @@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: err = dccp_setsockopt_cscov(sk, val, true); break; + case DCCP_SOCKOPT_QPOLICY_ID: + if (sk->sk_state != DCCP_CLOSED) + err = -EISCONN; + else if (val < 0 || val >= DCCPQ_POLICY_MAX) + err = -EINVAL; + else + dp->dccps_qpolicy = val; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + if (val < 0) + err = -EINVAL; + else + dp->dccps_tx_qlen = val; + break; default: err = -ENOPROTOOPT; break; @@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; + case DCCP_SOCKOPT_QPOLICY_ID: + val = dp->dccps_qpolicy; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + val = dp->dccps_tx_qlen; + break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); @@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); #endif +static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) +{ + struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); + + /* + * Assign an (opaque) qpolicy priority value to skb->priority. + * + * We are overloading this skb field for use with the qpolicy subystem. + * The skb->priority is normally used for the SO_PRIORITY option, which + * is initialised from sk_priority. Since the assignment of sk_priority + * to skb->priority happens later (on layer 3), we overload this field + * for use with queueing priorities as long as the skb is on layer 4. + * The default priority value (if nothing is set) is 0. + */ + skb->priority = 0; + + for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_DCCP) + continue; + + if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && + !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) + return -EINVAL; + + switch (cmsg->cmsg_type) { + case DCCP_SCM_PRIORITY: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) + return -EINVAL; + skb->priority = *(__u32 *)CMSG_DATA(cmsg); + break; + default: + return -EINVAL; + } + } + return 0; +} + int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - if (sysctl_dccp_tx_qlen && - (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { + if (dccp_qpolicy_full(sk)) { rc = -EAGAIN; goto out_release; } @@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - skb_queue_tail(&sk->sk_write_queue, skb); + rc = dccp_msghdr_parse(msg, skb); + if (rc != 0) + goto out_discard; + + dccp_qpolicy_push(sk, skb); /* * The xmit_timer is set if the TX CCID is rate-based and will expire * when congestion control permits to release further packets into the diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 000000000000..63c30bfa4703 --- /dev/null +++ b/net/dccp/qpolicy.c @@ -0,0 +1,137 @@ +/* + * net/dccp/qpolicy.c + * + * Policy-based packet dequeueing interface for DCCP. + * + * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License v2 + * as published by the Free Software Foundation. + */ +#include "dccp.h" + +/* + * Simple Dequeueing Policy: + * If tx_qlen is different from 0, enqueue up to tx_qlen elements. + */ +static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) +{ + skb_queue_tail(&sk->sk_write_queue, skb); +} + +static bool qpolicy_simple_full(struct sock *sk) +{ + return dccp_sk(sk)->dccps_tx_qlen && + sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; +} + +static struct sk_buff *qpolicy_simple_top(struct sock *sk) +{ + return skb_peek(&sk->sk_write_queue); +} + +/* + * Priority-based Dequeueing Policy: + * If tx_qlen is different from 0 and the queue has reached its upper bound + * of tx_qlen elements, replace older packets lowest-priority-first. + */ +static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) +{ + struct sk_buff *skb, *best = NULL; + + skb_queue_walk(&sk->sk_write_queue, skb) + if (best == NULL || skb->priority > best->priority) + best = skb; + return best; +} + +static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) +{ + struct sk_buff *skb, *worst = NULL; + + skb_queue_walk(&sk->sk_write_queue, skb) + if (worst == NULL || skb->priority < worst->priority) + worst = skb; + return worst; +} + +static bool qpolicy_prio_full(struct sock *sk) +{ + if (qpolicy_simple_full(sk)) + dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); + return false; +} + +/** + * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface + * @push: add a new @skb to the write queue + * @full: indicates that no more packets will be admitted + * @top: peeks at whatever the queueing policy defines as its `top' + */ +static struct dccp_qpolicy_operations { + void (*push) (struct sock *sk, struct sk_buff *skb); + bool (*full) (struct sock *sk); + struct sk_buff* (*top) (struct sock *sk); + __be32 params; + +} qpol_table[DCCPQ_POLICY_MAX] = { + [DCCPQ_POLICY_SIMPLE] = { + .push = qpolicy_simple_push, + .full = qpolicy_simple_full, + .top = qpolicy_simple_top, + .params = 0, + }, + [DCCPQ_POLICY_PRIO] = { + .push = qpolicy_simple_push, + .full = qpolicy_prio_full, + .top = qpolicy_prio_best_skb, + .params = DCCP_SCM_PRIORITY, + }, +}; + +/* + * Externally visible interface + */ +void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) +{ + qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); +} + +bool dccp_qpolicy_full(struct sock *sk) +{ + return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); +} + +void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) +{ + if (skb != NULL) { + skb_unlink(skb, &sk->sk_write_queue); + kfree_skb(skb); + } +} + +struct sk_buff *dccp_qpolicy_top(struct sock *sk) +{ + return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); +} + +struct sk_buff *dccp_qpolicy_pop(struct sock *sk) +{ + struct sk_buff *skb = dccp_qpolicy_top(sk); + + if (skb != NULL) { + /* Clear any skb fields that we used internally */ + skb->priority = 0; + skb_unlink(skb, &sk->sk_write_queue); + } + return skb; +} + +bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) +{ + /* check if exactly one bit is set */ + if (!param || (param & (param - 1))) + return false; + return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; +} diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 6f97268ed85f..0065e7e14af4 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1850,7 +1850,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu) { unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER; if (dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); mtu -= LL_RESERVED_SPACE(dev); if (dn_db->use_long) mtu -= 21; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4c409b46aa35..0ba15633c418 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, if (table->extra1 == NULL) return -EINVAL; - dn_db = dev->dn_ptr; + dn_db = rcu_dereference_raw(dev->dn_ptr); old = dn_db->parms.forwarding; err = proc_dointvec(table, write, buffer, lenp, ppos); @@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void) return ifa; } -static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa) +static void dn_dev_free_ifa_rcu(struct rcu_head *head) { - kfree(ifa); + kfree(container_of(head, struct dn_ifaddr, rcu)); } -static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy) +static void dn_dev_free_ifa(struct dn_ifaddr *ifa) { - struct dn_ifaddr *ifa1 = *ifap; + call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu); +} + +static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) +{ + struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap); unsigned char mac_addr[6]; struct net_device *dev = dn_db->dev; @@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Check for duplicates */ - for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + for (ifa1 = rtnl_dereference(dn_db->ifa_list); + ifa1 != NULL; + ifa1 = rtnl_dereference(ifa1->ifa_next)) { if (ifa1->ifa_local == ifa->ifa_local) return -EEXIST; } @@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) } ifa->ifa_next = dn_db->ifa_list; - dn_db->ifa_list = ifa; + rcu_assign_pointer(dn_db->ifa_list, ifa); dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); @@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); int rv; if (dn_db == NULL) { @@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; struct dn_dev *dn_db; struct net_device *dev; - struct dn_ifaddr *ifa = NULL, **ifap = NULL; + struct dn_ifaddr *ifa = NULL; + struct dn_ifaddr __rcu **ifap = NULL; int ret = 0; if (copy_from_user(ifr, arg, DN_IFREQ_SIZE)) @@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) goto done; } - if ((dn_db = dev->dn_ptr) != NULL) { - for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) + if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) { + for (ifap = &dn_db->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; + ifap = &ifa->ifa_next) if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0) break; } @@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) dev = __dev_get_by_index(&init_net, ifindex); if (dev) - dn_dev = dev->dn_ptr; + dn_dev = rtnl_dereference(dev->dn_ptr); return dn_dev; } @@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa, **ifap; + struct dn_ifaddr *ifa; + struct dn_ifaddr __rcu **ifap; int err = -EINVAL; if (!net_eq(net, &init_net)) @@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto errout; err = -EADDRNOTAVAIL; - for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { + for (ifap = &dn_db->ifa_list; + (ifa = rtnl_dereference(*ifap)) != NULL; + ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) continue; @@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; - if ((dn_db = dev->dn_ptr) == NULL) { + if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) { dn_db = dn_dev_create(dev, &err); if (!dn_db) return err; @@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = 0; } - if ((dn_db = dev->dn_ptr) == NULL) + if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) goto cont; - for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; - ifa = ifa->ifa_next, dn_idx++) { + for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; + ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { if (dn_idx < skip_naddr) continue; @@ -773,21 +786,22 @@ done: static int dn_dev_get_first(struct net_device *dev, __le16 *addr) { - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; int rv = -ENODEV; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) goto out; - rtnl_lock(); - ifa = dn_db->ifa_list; + ifa = rcu_dereference(dn_db->ifa_list); if (ifa != NULL) { *addr = ifa->ifa_local; rv = 0; } - rtnl_unlock(); out: + rcu_read_unlock(); return rv; } @@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) struct endnode_hello_message *msg; struct sk_buff *skb = NULL; __le16 *pktlen; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL) return; @@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) { int n; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; struct sk_buff *skb; size_t size; @@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa) { - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dn_send_endnode_hello(dev, ifa); @@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa) static int dn_eth_up(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dev_mc_add(dev, dn_rt_all_end_mcast); @@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev) static void dn_eth_down(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.forwarding == 0) dev_mc_del(dev, dn_rt_all_end_mcast); @@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev); static void dn_dev_timer_func(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db->t3 <= dn_db->parms.t2) { if (dn_db->parms.timer3) { - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa; + ifa = rcu_dereference(ifa->ifa_next)) { if (!(ifa->ifa_flags & IFA_F_SECONDARY)) dn_db->parms.timer3(dev, ifa); } @@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg) } else { dn_db->t3 -= dn_db->parms.t2; } - + rcu_read_unlock(); dn_dev_set_timer(dev); } static void dn_dev_set_timer(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); if (dn_db->parms.t2 > dn_db->parms.t3) dn_db->parms.t2 = dn_db->parms.t3; @@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) return NULL; memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - smp_wmb(); - dev->dn_ptr = dn_db; + + rcu_assign_pointer(dev->dn_ptr, dn_db); dn_db->dev = dev; init_timer(&dn_db->timer); @@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - dev->dn_ptr = NULL; + rcu_assign_pointer(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } @@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev) struct dn_ifaddr *ifa; __le16 addr = decnet_address; int maybe_default = 0; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) return; @@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev) static void dn_dev_delete(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); if (dn_db == NULL) return; @@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev) void dn_dev_down(struct net_device *dev) { - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); struct dn_ifaddr *ifa; if (dn_db == NULL) return; - while((ifa = dn_db->ifa_list) != NULL) { + while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) { dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0); dn_dev_free_ifa(ifa); } @@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev) } static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu) + __acquires(RCU) { int i; struct net_device *dev; @@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void dn_dev_seq_stop(struct seq_file *seq, void *v) - __releases(rcu) + __releases(RCU) { rcu_read_unlock(); } @@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) struct net_device *dev = v; char peer_buf[DN_ASCBUF_LEN]; char router_buf[DN_ASCBUF_LEN]; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr); seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu" " %04hu %03d %02x %-10s %-7s %-7s\n", diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 4ab96c15166d..0ef0a81bcd72 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) /* Scan device list */ rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - dn_db = dev->dn_ptr; + dn_db = rcu_dereference(dev->dn_ptr); if (dn_db == NULL) continue; - for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) { + for (ifa2 = rcu_dereference(dn_db->ifa_list); + ifa2 != NULL; + ifa2 = rcu_dereference(ifa2->ifa_next)) { if (ifa2->ifa_local == ifa->ifa_local) { found_it = 1; break; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index a085dbcf5c7f..602dade7e9a3 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) write_lock(&neigh->lock); neigh->used = jiffies; - dn_db = (struct dn_dev *)neigh->dev->dn_ptr; + dn_db = rcu_dereference(neigh->dev->dn_ptr); if (!(neigh->nud_state & NUD_PERMANENT)) { neigh->updated = jiffies; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index df0f3e54ff8a..8280e43c8861 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -93,7 +93,7 @@ struct dn_rt_hash_bucket { - struct dn_route *chain; + struct dn_route __rcu *chain; spinlock_t lock; }; @@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt) static void dn_dst_check_expire(unsigned long dummy) { int i; - struct dn_route *rt, **rtp; + struct dn_route *rt; + struct dn_route __rcu **rtp; unsigned long now = jiffies; unsigned long expire = 120 * HZ; - for(i = 0; i <= dn_rt_hash_mask; i++) { + for (i = 0; i <= dn_rt_hash_mask; i++) { rtp = &dn_rt_hash_table[i].chain; spin_lock(&dn_rt_hash_table[i].lock); - while((rt=*rtp) != NULL) { + while ((rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) || (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; @@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy) static int dn_dst_gc(struct dst_ops *ops) { - struct dn_route *rt, **rtp; + struct dn_route *rt; + struct dn_route __rcu **rtp; int i; unsigned long now = jiffies; unsigned long expire = 10 * HZ; - for(i = 0; i <= dn_rt_hash_mask; i++) { + for (i = 0; i <= dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); rtp = &dn_rt_hash_table[i].chain; - while((rt=*rtp) != NULL) { + while ((rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { if (atomic_read(&rt->dst.__refcnt) || (now - rt->dst.lastuse) < expire) { rtp = &rt->dst.dn_next; @@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { u32 min_mtu = 230; struct dn_dev *dn = dst->neighbour ? - (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL; + rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -267,23 +271,25 @@ static void dn_dst_link_failure(struct sk_buff *skb) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | - (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | + return ((fl1->fld_dst ^ fl2->fld_dst) | + (fl1->fld_src ^ fl2->fld_src) | (fl1->mark ^ fl2->mark) | - (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | + (fl1->fld_scope ^ fl2->fld_scope) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) { - struct dn_route *rth, **rthp; + struct dn_route *rth; + struct dn_route __rcu **rthp; unsigned long now = jiffies; rthp = &dn_rt_hash_table[hash].chain; spin_lock_bh(&dn_rt_hash_table[hash].lock); - while((rth = *rthp) != NULL) { + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ *rthp = rth->dst.dn_next; @@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy) int i; struct dn_route *rt, *next; - for(i = 0; i < dn_rt_hash_mask; i++) { + for (i = 0; i < dn_rt_hash_mask; i++) { spin_lock_bh(&dn_rt_hash_table[i].lock); - if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL) + if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL) goto nothing_to_declare; - for(; rt; rt=next) { - next = rt->dst.dn_next; - rt->dst.dn_next = NULL; + for(; rt; rt = next) { + next = rcu_dereference_raw(rt->dst.dn_next); + RCU_INIT_POINTER(rt->dst.dn_next, NULL); dst_free((struct dst_entry *)rt); } @@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb) */ static int dn_route_rx_packet(struct sk_buff *skb) { - struct dn_skb_cb *cb = DN_SKB_CB(skb); + struct dn_skb_cb *cb; int err; if ((err = dn_route_input(skb)) == 0) return dst_input(skb); + cb = DN_SKB_CB(skb); if (decnet_debug_level & 4) { char *devname = skb->dev ? skb->dev->name : "???"; - struct dn_skb_cb *cb = DN_SKB_CB(skb); + printk(KERN_DEBUG "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", (int)cb->rt_flags, devname, skb->len, @@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type struct dn_skb_cb *cb; unsigned char flags = 0; __u16 len = le16_to_cpu(*(__le16 *)skb->data); - struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn = rcu_dereference(dev->dn_ptr); unsigned char padlen = 0; if (!net_eq(dev_net(dev), &init_net)) @@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct dst_entry *dst = skb_dst(skb); - struct dn_dev *dn_db = dst->dev->dn_ptr; + struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; struct neighbour *neigh = dst->neighbour; int header_len; @@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2) static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) { __le16 saddr = 0; - struct dn_dev *dn_db = dev->dn_ptr; + struct dn_dev *dn_db; struct dn_ifaddr *ifa; int best_match = 0; int ret; - read_lock(&dev_base_lock); - for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); + for (ifa = rcu_dereference(dn_db->ifa_list); + ifa != NULL; + ifa = rcu_dereference(ifa->ifa_next)) { if (ifa->ifa_scope > scope) continue; if (!daddr) { @@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int if (best_match == 0) saddr = ifa->ifa_local; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return saddr; } @@ -872,11 +882,9 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .nl_u = { .dn_u = - { .daddr = oldflp->fld_dst, - .saddr = oldflp->fld_src, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fld_dst = oldflp->fld_dst, + .fld_src = oldflp->fld_src, + .fld_scope = RT_SCOPE_UNIVERSE, .mark = oldflp->mark, .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; @@ -1020,7 +1028,7 @@ source_ok: err = -ENODEV; if (dev_out == NULL) goto out; - dn_db = dev_out->dn_ptr; + dn_db = rcu_dereference_raw(dev_out->dn_ptr); /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); @@ -1171,7 +1179,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && (flp->mark == rt->fl.mark) && - (rt->fl.iif == 0) && + dn_is_output_route(rt) && (rt->fl.oif == flp->oif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); @@ -1220,11 +1228,9 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .nl_u = { .dn_u = - { .daddr = cb->dst, - .saddr = cb->src, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fld_dst = cb->dst, + .fld_src = cb->src, + .fld_scope = RT_SCOPE_UNIVERSE, .mark = skb->mark, .iif = skb->dev->ifindex }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; @@ -1233,7 +1239,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(in_dev); - if ((dn_db = in_dev->dn_ptr) == NULL) + if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL) goto out; /* Zero source addresses are not allowed */ @@ -1502,7 +1508,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, rt->dst.error) < 0) goto rtattr_failure; - if (rt->fl.iif) + if (dn_is_input_route(rt)) RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -1677,15 +1683,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rt->dst.dn_next; - while(!rt) { + rt = rcu_dereference_bh(rt->dst.dn_next); + while (!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) break; rcu_read_lock_bh(); - rt = dn_rt_hash_table[s->bucket].chain; + rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); } - return rcu_dereference_bh(rt); + return rt; } static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 48fdf10be7a1..6eb91df3c550 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -175,7 +175,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; + struct flowi fl = { .fld_dst = addr }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile index c0ef4e71dc49..d5c13c2eb36d 100644 --- a/net/dns_resolver/Makefile +++ b/net/dns_resolver/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o -dns_resolver-objs := dns_key.o dns_query.o +dns_resolver-y := dns_key.o dns_query.o diff --git a/net/econet/Makefile b/net/econet/Makefile index 39f0a77abdbd..05fae8be2fed 100644 --- a/net/econet/Makefile +++ b/net/econet/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECONET) += econet.o -econet-objs := af_econet.o +econet-y := af_econet.o diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 93c91b633a56..6df6ecf49708 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net, switch (addr->addr_type) { case IEEE802154_ADDR_LONG: - rtnl_lock(); - dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr); + rcu_read_lock(); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr); if (dev) dev_hold(dev); - rtnl_unlock(); + rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: if (addr->pan_id == 0xffff || diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f581f77d1097..f2b61107df6c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk) struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk), - }, - }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { - .ports = { - .sport = inet->inet_sport, - .dport = inet->inet_dport, - }, - }, + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport, }; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d8e540c5b071..a2fc7b961dbc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, - .saddr = tip } } }; + struct flowi fl = { .fl4_dst = sip, + .fl4_src = tip }; struct rtable *rt; int flag = 0; /*unsigned long now; */ @@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb) dont_send = arp_ignore(in_dev, sip, tip); if (!dont_send && IN_DEV_ARPFILTER(in_dev)) - dont_send |= arp_filter(sip, tip, dev); + dont_send = arp_filter(sip, tip, dev); if (!dont_send) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) { @@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rtnl(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); + if (__in_dev_get_rcu(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); return 0; } return -ENXIO; } +/* must be called with rcu_read_lock() */ static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, + dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; @@ -1061,8 +1062,8 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = RTO_ONLINK }; struct rtable *rt; err = ip_route_output_key(net, &rt, &fl); if (err != 0) @@ -1169,8 +1170,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { - struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = RTO_ONLINK }; struct rtable *rt; err = ip_route_output_key(net, &rt, &fl); if (err != 0) @@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_NETMASK)) ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = htonl(0xFFFFFFFFUL); - rtnl_lock(); + rcu_read_lock(); if (r.arp_dev[0]) { err = -ENODEV; - dev = __dev_get_by_name(net, r.arp_dev); + dev = dev_get_by_name_rcu(net, r.arp_dev); if (dev == NULL) goto out; @@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCGARP: err = arp_req_get(&r, dev); - if (!err && copy_to_user(arg, &r, sizeof(r))) - err = -EFAULT; break; } out: - rtnl_unlock(); + rcu_read_unlock(); + if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) + err = -EFAULT; return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dc94b0316b78..3b067704ab38 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1256,6 +1256,87 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } +static size_t inet_get_link_af_size(const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rtnl(dev); + + if (!in_dev) + return 0; + + return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ +} + +static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct nlattr *nla; + int i; + + if (!in_dev) + return -ENODATA; + + nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); + if (nla == NULL) + return -EMSGSIZE; + + for (i = 0; i < IPV4_DEVCONF_MAX; i++) + ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; + + return 0; +} + +static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { + [IFLA_INET_CONF] = { .type = NLA_NESTED }, +}; + +static int inet_validate_link_af(const struct net_device *dev, + const struct nlattr *nla) +{ + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int err, rem; + + if (dev && !__in_dev_get_rtnl(dev)) + return -EAFNOSUPPORT; + + err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); + if (err < 0) + return err; + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { + int cfgid = nla_type(a); + + if (nla_len(a) < 4) + return -EINVAL; + + if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) + return -EINVAL; + } + } + + return 0; +} + +static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int rem; + + if (!in_dev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) + BUG(); + + if (tb[IFLA_INET_CONF]) { + nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) + ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); + } + + return 0; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1619,6 +1700,14 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; +static struct rtnl_af_ops inet_af_ops = { + .family = AF_INET, + .fill_link_af = inet_fill_link_af, + .get_link_af_size = inet_get_link_af_size, + .validate_link_af = inet_validate_link_af, + .set_link_af = inet_set_link_af, +}; + void __init devinet_init(void) { register_pernet_subsys(&devinet_ops); @@ -1626,6 +1715,8 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); + rtnl_af_register(&inet_af_ops); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..d3a1112b9d9c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -158,11 +158,7 @@ static void fib_flush(struct net *net) struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = addr - } - }, + .fl4_dst = addr, .flags = FLOWI_FLAG_MATCH_ANY_IIF }; struct fib_result res = { 0 }; @@ -193,7 +189,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { .fl4_dst = addr }; struct fib_result res; unsigned ret = RTN_BROADCAST; struct fib_table *local_table; @@ -247,13 +243,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, { struct in_device *in_dev; struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = src, - .saddr = dst, - .tos = tos - } - }, + .fl4_dst = src, + .fl4_src = dst, + .fl4_tos = tos, .mark = mark, .iif = oif }; @@ -853,13 +845,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) struct fib_result res; struct flowi fl = { .mark = frn->fl_mark, - .nl_u = { - .ip4_u = { - .daddr = frn->fl_addr, - .tos = frn->fl_tos, - .scope = frn->fl_scope - } - } + .fl4_dst = frn->fl_addr, + .fl4_tos = frn->fl_tos, + .fl4_scope = frn->fl_scope, }; #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3e0da3ef6116..12d3dc3df1b7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, rcu_read_lock(); { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = nh->nh_gw, - .scope = cfg->fc_scope + 1, - }, - }, + .fl4_dst = nh->nh_gw, + .fl4_scope = cfg->fc_scope + 1, .oif = nh->nh_oif, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e5d1a44bcbdf..4aa1b7f01ea0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = rt->rt_spec_dst, - .tos = RT_TOS(ip_hdr(skb)->tos) } }, + struct flowi fl = { .fl4_dst= daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(net, &rt, &fl)) @@ -506,8 +505,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct net_device *dev = NULL; rcu_read_lock(); - if (rt->fl.iif && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + if (rt_is_input_route(rt) && + net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) dev = dev_get_by_index_rcu(net, rt->fl.iif); if (dev) @@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = icmp_param.replyopts.srr ? - icmp_param.replyopts.faddr : - iph->saddr, - .saddr = saddr, - .tos = RT_TOS(tos) - } - }, + .fl4_dst = icmp_param.replyopts.srr ? + icmp_param.replyopts.faddr : iph->saddr, + .fl4_src = saddr, + .fl4_tos = RT_TOS(tos), .proto = IPPROTO_ICMP, - .uli_u = { - .icmpt = { - .type = type, - .code = code - } - } + .fl_icmp_type = type, + .fl_icmp_code = code, }; int err; struct rtable *rt2; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3c53c2d89e3b..e0e77e297de3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -149,21 +149,37 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, int sfcount, __be32 *psfsrc, int delta); + +static void ip_mc_list_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_mc_list, rcu)); +} + static void ip_ma_put(struct ip_mc_list *im) { if (atomic_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); - kfree(im); + call_rcu(&im->rcu, ip_mc_list_reclaim); } } +#define for_each_pmc_rcu(in_dev, pmc) \ + for (pmc = rcu_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next_rcu)) + +#define for_each_pmc_rtnl(in_dev, pmc) \ + for (pmc = rtnl_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rtnl_dereference(pmc->next_rcu)) + #ifdef CONFIG_IP_MULTICAST /* * Timer management */ -static __inline__ void igmp_stop_timer(struct ip_mc_list *im) +static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) @@ -284,6 +300,8 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) + static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) { struct sk_buff *skb; @@ -292,14 +310,20 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct igmpv3_report *pig; struct net *net = dev_net(dev); - skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); - if (skb == NULL) - return NULL; + while (1) { + skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), + GFP_ATOMIC | __GFP_NOWARN); + if (skb) + break; + size >>= 1; + if (size < 256) + return NULL; + } + igmp_skb_size(skb) = size; { struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { - .daddr = IGMPV3_ALL_MCR } }, + .fl4_dst = IGMPV3_ALL_MCR, .proto = IPPROTO_IGMP }; if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); @@ -384,7 +408,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ +#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ skb_tailroom(skb)) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, @@ -502,8 +526,8 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) int type; if (!pmc) { - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; spin_lock_bh(&pmc->lock); @@ -514,7 +538,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -556,7 +580,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->mc_list_lock); + rcu_read_lock(); spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ @@ -593,7 +617,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(in_dev, pmc) { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -616,7 +640,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); if (!skb) return; @@ -644,7 +668,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, { struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { .daddr = dst } }, + .fl4_dst = dst, .proto = IPPROTO_IGMP }; if (ip_route_output_key(net, &rt, &fl)) return -1; @@ -813,14 +837,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, @@ -906,8 +930,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { int changed; if (group && group != im->multiaddr) @@ -925,7 +949,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (changed) igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } /* called in rcu_read_lock() section */ @@ -961,7 +985,7 @@ int igmp_rcv(struct sk_buff *skb) case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ - if (skb_rtable(skb)->fl.iif == 0) + if (rt_is_output_route(skb_rtable(skb))) break; /* don't rely on MC router hearing unicast reports */ if (skb->pkt_type == PACKET_MULTICAST || @@ -1110,8 +1134,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { struct ip_sf_list *psf, *psf_next; spin_lock_bh(&pmc->lock); @@ -1123,7 +1147,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(psf); } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } #endif @@ -1209,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - for (im=in_dev->mc_list; im; im=im->next) { + for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == addr) { im->users++; ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); @@ -1217,7 +1241,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) } } - im = kmalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), GFP_KERNEL); if (!im) goto out; @@ -1227,26 +1251,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; - im->sfcount[MCAST_INCLUDE] = 0; im->sfcount[MCAST_EXCLUDE] = 1; - im->sources = NULL; - im->tomb = NULL; - im->crcount = 0; atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; - im->reporter = 0; - im->gsquery = 0; #endif - im->loaded = 0; - write_lock_bh(&in_dev->mc_list_lock); - im->next = in_dev->mc_list; - in_dev->mc_list = im; + + im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - write_unlock_bh(&in_dev->mc_list_lock); + rcu_assign_pointer(in_dev->mc_list, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1260,26 +1276,32 @@ EXPORT_SYMBOL(ip_mc_inc_group); /* * Resend IGMP JOIN report; used for bonding. + * Called with rcu_read_lock() */ -void ip_mc_rejoin_group(struct ip_mc_list *im) +void ip_mc_rejoin_groups(struct in_device *in_dev) { #ifdef CONFIG_IP_MULTICAST - struct in_device *in_dev = im->interface; + struct ip_mc_list *im; + int type; - if (im->multiaddr == IGMP_ALL_HOSTS) - return; + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == IGMP_ALL_HOSTS) + continue; - /* a failover is happening and switches - * must be notified immediately */ - if (IGMP_V1_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT); - else if (IGMP_V2_SEEN(in_dev)) - igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT); - else - igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT); + /* a failover is happening and switches + * must be notified immediately + */ + if (IGMP_V1_SEEN(in_dev)) + type = IGMP_HOST_MEMBERSHIP_REPORT; + else if (IGMP_V2_SEEN(in_dev)) + type = IGMPV2_HOST_MEMBERSHIP_REPORT; + else + type = IGMPV3_HOST_MEMBERSHIP_REPORT; + igmp_send_report(in_dev, im, type); + } #endif } -EXPORT_SYMBOL(ip_mc_rejoin_group); +EXPORT_SYMBOL(ip_mc_rejoin_groups); /* * A socket has left a multicast group on device dev @@ -1287,17 +1309,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group); void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { - struct ip_mc_list *i, **ip; + struct ip_mc_list *i; + struct ip_mc_list __rcu **ip; ASSERT_RTNL(); - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { + for (ip = &in_dev->mc_list; + (i = rtnl_dereference(*ip)) != NULL; + ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->mc_list_lock); - *ip = i->next; + *ip = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); if (!in_dev->dead) @@ -1316,34 +1339,34 @@ EXPORT_SYMBOL(ip_mc_dec_group); void ip_mc_unmap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); } void ip_mc_remap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* Device going down */ void ip_mc_down(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; @@ -1374,7 +1397,6 @@ void ip_mc_init_dev(struct in_device *in_dev) in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - rwlock_init(&in_dev->mc_list_lock); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1382,14 +1404,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* @@ -1405,24 +1427,19 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->mc_list_lock); - while ((i = in_dev->mc_list) != NULL) { - in_dev->mc_list = i->next; + while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { + in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); + igmp_group_dropped(i); ip_ma_put(i); - - write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->mc_list_lock); } /* RTNL is locked */ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = imr->imr_multiaddr.s_addr } } }; + struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; @@ -1513,18 +1530,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1685,18 +1702,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -1793,7 +1810,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -EADDRINUSE; ifindex = imr->imr_ifindex; - for (i = inet->mc_list; i; i = i->next) { + for_each_pmc_rtnl(inet, i) { if (i->multi.imr_multiaddr.s_addr == addr && i->multi.imr_ifindex == ifindex) goto done; @@ -1807,7 +1824,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); - iml->next = inet->mc_list; + iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; rcu_assign_pointer(inet->mc_list, iml); @@ -1821,17 +1838,14 @@ EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { - struct ip_sf_socklist *psf; - - psf = container_of(rp, struct ip_sf_socklist, rcu); + kfree(container_of(rp, struct ip_sf_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(psf); } static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { - struct ip_sf_socklist *psf = iml->sflist; + struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; if (psf == NULL) { @@ -1851,11 +1865,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, static void ip_mc_socklist_reclaim(struct rcu_head *rp) { - struct ip_mc_socklist *iml; - - iml = container_of(rp, struct ip_mc_socklist, rcu); + kfree(container_of(rp, struct ip_mc_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(iml); } @@ -1866,7 +1877,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp) int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); - struct ip_mc_socklist *iml, **imlp; + struct ip_mc_socklist *iml; + struct ip_mc_socklist __rcu **imlp; struct in_device *in_dev; struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; @@ -1876,7 +1888,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; - for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; + imlp = &iml->next_rcu) { if (iml->multi.imr_multiaddr.s_addr != group) continue; if (ifindex) { @@ -1888,7 +1902,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - rcu_assign_pointer(*imlp, iml->next); + *imlp = iml->next_rcu; if (in_dev) ip_mc_dec_group(in_dev, group); @@ -1934,7 +1948,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if ((pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr) && (pmc->multi.imr_ifindex == imr.imr_ifindex)) @@ -1958,7 +1972,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct pmc->sfmode = omode; } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -2077,7 +2091,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2107,7 +2121,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, 0, NULL, 0); } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); @@ -2155,7 +2169,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2163,7 +2177,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!pmc) /* must have a prior join */ goto done; msf->imsf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); if (!psl) { len = 0; @@ -2208,7 +2222,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; @@ -2216,7 +2230,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!pmc) /* must have a prior join */ goto done; gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; @@ -2257,7 +2271,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) goto out; rcu_read_lock(); - for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { + for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; @@ -2265,7 +2279,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) ret = inet->mc_all; if (!pmc) goto unlock; - psl = pmc->sflist; + psl = rcu_dereference(pmc->sflist); ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) goto unlock; @@ -2300,10 +2314,10 @@ void ip_mc_drop_socket(struct sock *sk) return; rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - rcu_assign_pointer(inet->mc_list, iml->next); + inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) @@ -2321,8 +2335,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; } @@ -2343,7 +2357,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return rv; } @@ -2369,13 +2383,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; - read_lock(&in_dev->mc_list_lock); - im = in_dev->mc_list; + im = rcu_dereference(in_dev->mc_list); if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->mc_list_lock); } return im; } @@ -2383,11 +2395,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - im = im->next; - while (!im) { - if (likely(state->in_dev != NULL)) - read_unlock(&state->in_dev->mc_list_lock); + im = rcu_dereference(im->next_rcu); + while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; @@ -2396,8 +2406,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->mc_list_lock); - im = state->in_dev->mc_list; + im = rcu_dereference(state->in_dev->mc_list); } return im; } @@ -2433,10 +2442,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - state->in_dev = NULL; - } + + state->in_dev = NULL; state->dev = NULL; rcu_read_unlock(); } @@ -2458,7 +2465,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (state->in_dev->mc_list == im) { + if (rcu_dereference(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } @@ -2517,8 +2524,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; - read_lock(&idev->mc_list_lock); - im = idev->mc_list; + im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { spin_lock_bh(&im->lock); psf = im->sources; @@ -2529,7 +2535,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock(&idev->mc_list_lock); } return psf; } @@ -2543,9 +2548,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) - read_unlock(&state->idev->mc_list_lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2554,8 +2556,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; - read_lock(&state->idev->mc_list_lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; @@ -2601,10 +2602,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&state->im->lock); state->im = NULL; } - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - state->idev = NULL; - } + state->idev = NULL; state->dev = NULL; rcu_read_unlock(); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7174370b1195..06f5f8f482f0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -358,17 +358,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options *opt = inet_rsk(req)->opt; struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet_sk(sk)->inet_sport, - .dport = ireq->rmt_port } } }; + .fl_ip_sport = inet_sk(sk)->inet_sport, + .fl_ip_dport = ireq->rmt_port }; struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9e94d7cf4f8a..d9bc85751c74 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -63,7 +63,7 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * dtime: unused node list lock - * v4daddr: unchangeable + * daddr: unchangeable * ip_id_count: atomic value (no lock needed) */ @@ -79,15 +79,24 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -static struct { +struct inet_peer_base { struct inet_peer __rcu *root; spinlock_t lock; int total; -} peers = { +}; + +static struct inet_peer_base v4_peers = { + .root = peer_avl_empty_rcu, + .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), + .total = 0, +}; + +static struct inet_peer_base v6_peers = { .root = peer_avl_empty_rcu, - .lock = __SPIN_LOCK_UNLOCKED(peers.lock), + .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), .total = 0, }; + #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -152,28 +161,45 @@ static void unlink_from_unused(struct inet_peer *p) } } +static int addr_compare(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) +{ + int i, n = (a->family == AF_INET ? 1 : 4); + + for (i = 0; i < n; i++) { + if (a->a6[i] == b->a6[i]) + continue; + if (a->a6[i] < b->a6[i]) + return -1; + return 1; + } + + return 0; +} + /* * Called with local BH disabled and the pool lock held. */ -#define lookup(_daddr, _stack) \ +#define lookup(_daddr, _stack, _base) \ ({ \ struct inet_peer *u; \ struct inet_peer __rcu **v; \ \ stackptr = _stack; \ - *stackptr++ = &peers.root; \ - for (u = rcu_dereference_protected(peers.root, \ - lockdep_is_held(&peers.lock)); \ + *stackptr++ = &_base->root; \ + for (u = rcu_dereference_protected(_base->root, \ + lockdep_is_held(&_base->lock)); \ u != peer_avl_empty; ) { \ - if (_daddr == u->v4daddr) \ + int cmp = addr_compare(_daddr, &u->daddr); \ + if (cmp == 0) \ break; \ - if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ + if (cmp == -1) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_dereference_protected(*v, \ - lockdep_is_held(&peers.lock)); \ + lockdep_is_held(&_base->lock)); \ } \ u; \ }) @@ -185,13 +211,15 @@ static void unlink_from_unused(struct inet_peer *p) * But every pointer we follow is guaranteed to be valid thanks to RCU. * We exit from this function if number of links exceeds PEER_MAXDEPTH */ -static struct inet_peer *lookup_rcu_bh(__be32 daddr) +static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, + struct inet_peer_base *base) { - struct inet_peer *u = rcu_dereference_bh(peers.root); + struct inet_peer *u = rcu_dereference_bh(base->root); int count = 0; while (u != peer_avl_empty) { - if (daddr == u->v4daddr) { + int cmp = addr_compare(daddr, &u->daddr); + if (cmp == 0) { /* Before taking a reference, check if this entry was * deleted, unlink_from_pool() sets refcnt=-1 to make * distinction between an unused entry (refcnt=0) and @@ -201,7 +229,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) u = NULL; return u; } - if ((__force __u32)daddr < (__force __u32)u->v4daddr) + if (cmp == -1) u = rcu_dereference_bh(u->avl_left); else u = rcu_dereference_bh(u->avl_right); @@ -212,19 +240,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) } /* Called with local BH disabled and the pool lock held. */ -#define lookup_rightempty(start) \ +#define lookup_rightempty(start, base) \ ({ \ struct inet_peer *u; \ struct inet_peer __rcu **v; \ *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ for (u = rcu_dereference_protected(*v, \ - lockdep_is_held(&peers.lock)); \ + lockdep_is_held(&base->lock)); \ u->avl_right != peer_avl_empty_rcu; ) { \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_dereference_protected(*v, \ - lockdep_is_held(&peers.lock)); \ + lockdep_is_held(&base->lock)); \ } \ u; \ }) @@ -234,7 +262,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) * Look into mm/map_avl.c for more detail description of the ideas. */ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], - struct inet_peer __rcu ***stackend) + struct inet_peer __rcu ***stackend, + struct inet_peer_base *base) { struct inet_peer __rcu **nodep; struct inet_peer *node, *l, *r; @@ -243,20 +272,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], while (stackend > stack) { nodep = *--stackend; node = rcu_dereference_protected(*nodep, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); l = rcu_dereference_protected(node->avl_left, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); r = rcu_dereference_protected(node->avl_right, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); lh = node_height(l); rh = node_height(r); if (lh > rh + 1) { /* l: RH+2 */ struct inet_peer *ll, *lr, *lrl, *lrr; int lrh; ll = rcu_dereference_protected(l->avl_left, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); lr = rcu_dereference_protected(l->avl_right, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); lrh = node_height(lr); if (lrh <= node_height(ll)) { /* ll: RH+1 */ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ @@ -268,9 +297,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], RCU_INIT_POINTER(*nodep, l); } else { /* ll: RH, lr: RH+1 */ lrl = rcu_dereference_protected(lr->avl_left, - lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ + lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ lrr = rcu_dereference_protected(lr->avl_right, - lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ + lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ node->avl_height = rh + 1; /* node: RH+1 */ @@ -286,9 +315,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], struct inet_peer *rr, *rl, *rlr, *rll; int rlh; rr = rcu_dereference_protected(r->avl_right, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); rl = rcu_dereference_protected(r->avl_left, - lockdep_is_held(&peers.lock)); + lockdep_is_held(&base->lock)); rlh = node_height(rl); if (rlh <= node_height(rr)) { /* rr: LH+1 */ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ @@ -300,9 +329,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], RCU_INIT_POINTER(*nodep, r); } else { /* rr: RH, rl: RH+1 */ rlr = rcu_dereference_protected(rl->avl_right, - lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ + lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ rll = rcu_dereference_protected(rl->avl_left, - lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ + lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ node->avl_height = lh + 1; /* node: LH+1 */ @@ -321,14 +350,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], } /* Called with local BH disabled and the pool lock held. */ -#define link_to_pool(n) \ +#define link_to_pool(n, base) \ do { \ n->avl_height = 1; \ n->avl_left = peer_avl_empty_rcu; \ n->avl_right = peer_avl_empty_rcu; \ /* lockless readers can catch us now */ \ rcu_assign_pointer(**--stackptr, n); \ - peer_avl_rebalance(stack, stackptr); \ + peer_avl_rebalance(stack, stackptr, base); \ } while (0) static void inetpeer_free_rcu(struct rcu_head *head) @@ -337,13 +366,13 @@ static void inetpeer_free_rcu(struct rcu_head *head) } /* May be called with local BH enabled. */ -static void unlink_from_pool(struct inet_peer *p) +static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) { int do_free; do_free = 0; - spin_lock_bh(&peers.lock); + spin_lock_bh(&base->lock); /* Check the reference counter. It was artificially incremented by 1 * in cleanup() function to prevent sudden disappearing. If we can * atomically (because of lockless readers) take this last reference, @@ -353,7 +382,7 @@ static void unlink_from_pool(struct inet_peer *p) if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { struct inet_peer __rcu **stack[PEER_MAXDEPTH]; struct inet_peer __rcu ***stackptr, ***delp; - if (lookup(p->v4daddr, stack) != p) + if (lookup(&p->daddr, stack, base) != p) BUG(); delp = stackptr - 1; /* *delp[0] == p */ if (p->avl_left == peer_avl_empty_rcu) { @@ -362,11 +391,11 @@ static void unlink_from_pool(struct inet_peer *p) } else { /* look for a node to insert instead of p */ struct inet_peer *t; - t = lookup_rightempty(p); + t = lookup_rightempty(p, base); BUG_ON(rcu_dereference_protected(*stackptr[-1], - lockdep_is_held(&peers.lock)) != t); + lockdep_is_held(&base->lock)) != t); **--stackptr = t->avl_left; - /* t is removed, t->v4daddr > x->v4daddr for any + /* t is removed, t->daddr > x->daddr for any * x in p->avl_left subtree. * Put t in the old place of p. */ RCU_INIT_POINTER(*delp[0], t); @@ -376,11 +405,11 @@ static void unlink_from_pool(struct inet_peer *p) BUG_ON(delp[1] != &p->avl_left); delp[1] = &t->avl_left; /* was &p->avl_left */ } - peer_avl_rebalance(stack, stackptr); - peers.total--; + peer_avl_rebalance(stack, stackptr, base); + base->total--; do_free = 1; } - spin_unlock_bh(&peers.lock); + spin_unlock_bh(&base->lock); if (do_free) call_rcu_bh(&p->rcu, inetpeer_free_rcu); @@ -395,6 +424,16 @@ static void unlink_from_pool(struct inet_peer *p) inet_putpeer(p); } +static struct inet_peer_base *family_to_base(int family) +{ + return (family == AF_INET ? &v4_peers : &v6_peers); +} + +static struct inet_peer_base *peer_to_base(struct inet_peer *p) +{ + return family_to_base(p->daddr.family); +} + /* May be called with local BH enabled. */ static int cleanup_once(unsigned long ttl) { @@ -428,21 +467,22 @@ static int cleanup_once(unsigned long ttl) * happen because of entry limits in route cache. */ return -1; - unlink_from_pool(p); + unlink_from_pool(p, peer_to_base(p)); return 0; } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__be32 daddr, int create) +struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { - struct inet_peer *p; struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; + struct inet_peer_base *base = family_to_base(AF_INET); + struct inet_peer *p; /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock_bh(); - p = lookup_rcu_bh(daddr); + p = lookup_rcu_bh(daddr, base); rcu_read_unlock_bh(); if (p) { @@ -456,50 +496,57 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ - spin_lock_bh(&peers.lock); - p = lookup(daddr, stack); + spin_lock_bh(&base->lock); + p = lookup(daddr, stack, base); if (p != peer_avl_empty) { atomic_inc(&p->refcnt); - spin_unlock_bh(&peers.lock); + spin_unlock_bh(&base->lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); return p; } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { - p->v4daddr = daddr; + p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr)); + atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); p->tcp_ts_stamp = 0; INIT_LIST_HEAD(&p->unused); /* Link the node. */ - link_to_pool(p); - peers.total++; + link_to_pool(p, base); + base->total++; } - spin_unlock_bh(&peers.lock); + spin_unlock_bh(&base->lock); - if (peers.total >= inet_peer_threshold) + if (base->total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ cleanup_once(0); return p; } +static int compute_total(void) +{ + return v4_peers.total + v6_peers.total; +} +EXPORT_SYMBOL_GPL(inet_getpeer); + /* Called with local BH disabled. */ static void peer_check_expire(unsigned long dummy) { unsigned long now = jiffies; - int ttl; + int ttl, total; - if (peers.total >= inet_peer_threshold) + total = compute_total(); + if (total >= inet_peer_threshold) ttl = inet_peer_minttl; else ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peers.total / inet_peer_threshold * HZ; + total / inet_peer_threshold * HZ; while (!cleanup_once(ttl)) { if (jiffies != now) break; @@ -508,13 +555,14 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peers.total >= inet_peer_threshold) + total = compute_total(); + if (total >= inet_peer_threshold) peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; else peer_periodic_timer.expires = jiffies + inet_peer_gc_maxtime - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peers.total / inet_peer_threshold * HZ; + total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } @@ -530,3 +578,4 @@ void inet_putpeer(struct inet_peer *p) local_bh_enable(); } +EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 168440834ade..e6215bdd96c0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -141,7 +141,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 70ff77f02eee..258c98d5fa79 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -405,11 +405,11 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, if (parms->name[0]) strlcpy(name, parms->name, IFNAMSIZ); else - sprintf(name, "gre%%d"); + strcpy(name, "gre%d"); dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); if (!dev) - return NULL; + return NULL; dev_net_set(dev, net); @@ -634,7 +634,7 @@ static int ipgre_rcv(struct sk_buff *skb) #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ - if (skb_rtable(skb)->fl.iif == 0) + if (rt_is_output_route(skb_rtable(skb))) goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; @@ -772,16 +772,11 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) - } - }, - .proto = IPPROTO_GRE - } -; + .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), + .fl_gre_key = tunnel->parms.o_key + }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { dev->stats.tx_carrier_errors++; goto tx_error; @@ -951,14 +946,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) - } - }, - .proto = IPPROTO_GRE + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), + .proto = IPPROTO_GRE, + .fl_gre_key = tunnel->parms.o_key }; struct rtable *rt; @@ -1216,14 +1208,11 @@ static int ipgre_open(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr)) { struct flowi fl = { .oif = t->parms.link, - .nl_u = { - .ip4_u = { - .daddr = t->parms.iph.daddr, - .saddr = t->parms.iph.saddr, - .tos = RT_TOS(t->parms.iph.tos) - } - }, - .proto = IPPROTO_GRE + .fl4_dst = t->parms.iph.daddr, + .fl4_src = t->parms.iph.saddr, + .fl4_tos = RT_TOS(t->parms.iph.tos), + .proto = IPPROTO_GRE, + .fl_gre_key = t->parms.o_key }; struct rtable *rt; @@ -1775,3 +1764,4 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); +MODULE_ALIAS("gre0"); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 439d2a34ee44..5090c7ff525e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -341,15 +341,13 @@ int ip_queue_xmit(struct sk_buff *skb) { struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = inet->inet_dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times @@ -1404,14 +1402,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { struct flowi fl = { .oif = arg->bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = rt->rt_spec_dst, - .tos = RT_TOS(ip_hdr(skb)->tos) } }, - /* Not quite clean, but right. */ - .uli_u = { .ports = - { .sport = tcp_hdr(skb)->dest, - .dport = tcp_hdr(skb)->source } }, + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl_ip_sport = tcp_hdr(skb)->dest, + .fl_ip_dport = tcp_hdr(skb)->source, .proto = sk->sk_protocol, .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 3a6e1ec5e9ae..2b097752426b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1191,13 +1191,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - printk(","); + printk(KERN_CONT ","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - printk(" OK\n"); + printk(KERN_CONT " OK\n"); break; } @@ -1205,7 +1205,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - printk(" timed out!\n"); + printk(KERN_CONT " timed out!\n"); break; } @@ -1215,7 +1215,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - printk("."); + printk(KERN_CONT "."); } #ifdef IPCONFIG_BOOTP @@ -1236,7 +1236,7 @@ static int __init ic_dynamic(void) ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), &ic_servaddr); - printk("my address is %pI4\n", &ic_myaddr); + printk(KERN_CONT "my address is %pI4\n", &ic_myaddr); return 0; } @@ -1468,19 +1468,19 @@ static int __init ip_auto_config(void) /* * Clue in the operator. */ - printk("IP-Config: Complete:"); - printk("\n device=%s", ic_dev->name); - printk(", addr=%pI4", &ic_myaddr); - printk(", mask=%pI4", &ic_netmask); - printk(", gw=%pI4", &ic_gateway); - printk(",\n host=%s, domain=%s, nis-domain=%s", + printk("IP-Config: Complete:\n"); + printk(" device=%s", ic_dev->name); + printk(KERN_CONT ", addr=%pI4", &ic_myaddr); + printk(KERN_CONT ", mask=%pI4", &ic_netmask); + printk(KERN_CONT ", gw=%pI4", &ic_gateway); + printk(KERN_CONT ",\n host=%s, domain=%s, nis-domain=%s", utsname()->nodename, ic_domain, utsname()->domainname); - printk(",\n bootserver=%pI4", &ic_servaddr); - printk(", rootserver=%pI4", &root_server_addr); - printk(", rootpath=%s", root_server_path); + printk(KERN_CONT ",\n bootserver=%pI4", &ic_servaddr); + printk(KERN_CONT ", rootserver=%pI4", &root_server_addr); + printk(KERN_CONT ", rootpath=%s", root_server_path); if (ic_dev_mtu) - printk(", mtu=%d", ic_dev_mtu); - printk("\n"); + printk(KERN_CONT ", mtu=%d", ic_dev_mtu); + printk(KERN_CONT "\n"); #endif /* !SILENT */ return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index cd300aaee78f..988f52fba54a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) - } - }, + .fl4_dst = dst, + .fl4_src= tiph->saddr, + .fl4_tos = RT_TOS(tos), .proto = IPPROTO_IPIP }; @@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; struct rtable *rt; @@ -921,3 +913,4 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 86dd5691af46..3f3a9afd73e0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (vif->flags & VIFF_TUNNEL) { struct flowi fl = { .oif = vif->link, - .nl_u = { - .ip4_u = { - .daddr = vif->remote, - .saddr = vif->local, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = vif->remote, + .fl4_src = vif->local, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; @@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, } else { struct flowi fl = { .oif = vif->link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; @@ -1654,7 +1646,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, if (mrt->vif_table[vif].dev != skb->dev) { int true_vifi; - if (skb_rtable(skb)->fl.iif == 0) { + if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... * diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index d88a46c54fd1..994a1f29ebbc 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; + fl.fl4_dst = iph->daddr; if (type == RTN_LOCAL) - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.fl4_src = iph->saddr; + fl.fl4_tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; @@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; + fl.fl4_dst = iph->saddr; if (ip_route_output_key(net, &rt, &fl) != 0) return -1; diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 48111594ee9b..19eb59d01037 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -3,15 +3,15 @@ # # objects for l3 independent conntrack -nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) ifeq ($(CONFIG_PROC_FS),y) nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o endif endif -nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o -iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o +nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o +iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1f85ef289895..a3d5ab786e81 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct flowi fl = { .oif = ipc.oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = saddr, - .tos = tos } }, + .fl4_dst = daddr, + .fl4_src = saddr, + .fl4_tos = tos, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 987bf9adb318..3843c2dfde82 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -140,13 +140,15 @@ static unsigned long expires_ljiffies; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static void ipv4_dst_destroy(struct dst_entry *dst); -static void ipv4_dst_ifdown(struct dst_entry *dst, - struct net_device *dev, int how); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); +static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, + int how) +{ +} static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -621,7 +623,7 @@ static inline int rt_fast_clean(struct rtable *rth) /* Kill broadcast/multicast entries very aggresively, if they collide in hash table with more useful entries */ return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rth->fl.iif && rth->dst.rt_next; + rt_is_input_route(rth) && rth->dst.rt_next; } static inline int rt_valuable(struct rtable *rth) @@ -666,7 +668,7 @@ static inline u32 rt_score(struct rtable *rt) if (rt_valuable(rt)) score |= (1<<31); - if (!rt->fl.iif || + if (rt_is_output_route(rt) || !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) score |= (1<<30); @@ -682,17 +684,17 @@ static inline bool rt_caching(const struct net *net) static inline bool compare_hash_inputs(const struct flowi *fl1, const struct flowi *fl2) { - return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | + return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | + ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | (fl1->iif ^ fl2->iif)) == 0); } static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | + return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | + ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | + (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -1124,7 +1126,7 @@ restart: */ rt->dst.flags |= DST_NOCACHE; - if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { + if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { int err = arp_bind_neighbour(&rt->dst); if (err) { if (net_ratelimit()) @@ -1222,7 +1224,7 @@ restart: /* Try to bind route to arp only if it is output route or unicast forwarding path. */ - if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { + if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { int err = arp_bind_neighbour(&rt->dst); if (err) { spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1287,7 +1289,7 @@ void rt_bind_peer(struct rtable *rt, int create) { struct inet_peer *peer; - peer = inet_getpeer(rt->rt_dst, create); + peer = inet_getpeer_v4(rt->rt_dst, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1404,7 +1406,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || - rth->fl.iif != 0 || + rt_is_input_route(rth) || rt_is_expired(rth) || !net_eq(dev_net(rth->dst.dev), net)) { rthp = &rth->dst.rt_next; @@ -1433,8 +1435,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->dst.child = NULL; if (rt->dst.dev) dev_hold(rt->dst.dev); - if (rt->idev) - in_dev_hold(rt->idev); rt->dst.obsolete = -1; rt->dst.lastuse = jiffies; rt->dst.path = &rt->dst; @@ -1666,7 +1666,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_dst != daddr || rth->rt_src != iph->saddr || rth->fl.oif != ikeys[k] || - rth->fl.iif != 0 || + rt_is_input_route(rth) || dst_metric_locked(&rth->dst, RTAX_MTU) || !net_eq(dev_net(rth->dst.dev), net) || rt_is_expired(rth)) @@ -1728,33 +1728,13 @@ static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; - struct in_device *idev = rt->idev; if (peer) { rt->peer = NULL; inet_putpeer(peer); } - - if (idev) { - rt->idev = NULL; - in_dev_put(idev); - } } -static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, - int how) -{ - struct rtable *rt = (struct rtable *) dst; - struct in_device *idev = rt->idev; - if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = - in_dev_get(dev_net(dev)->loopback_dev); - if (loopback_idev) { - rt->idev = loopback_idev; - in_dev_put(idev); - } - } -} static void ipv4_link_failure(struct sk_buff *skb) { @@ -1790,7 +1770,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) __be32 src; struct fib_result res; - if (rt->fl.iif == 0) + if (rt_is_output_route(rt)) src = rt->rt_src; else { rcu_read_lock(); @@ -1910,7 +1890,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); - rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; @@ -2050,7 +2029,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->fl.iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); - rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; @@ -2111,12 +2089,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = saddr, - .tos = tos, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fl4_dst = daddr, + .fl4_src = saddr, + .fl4_tos = tos, + .fl4_scope = RT_SCOPE_UNIVERSE, .mark = skb->mark, .iif = dev->ifindex }; unsigned flags = 0; @@ -2231,7 +2207,6 @@ local_input: rth->fl.iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); - rth->idev = in_dev_get(rth->dst.dev); rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->dst.input= ip_local_deliver; @@ -2417,9 +2392,6 @@ static int __mkroute_output(struct rtable **result, if (!rth) return -ENOBUFS; - in_dev_hold(in_dev); - rth->idev = in_dev; - atomic_set(&rth->dst.__refcnt, 1); rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOXFRM)) @@ -2506,14 +2478,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = oldflp->fl4_dst, - .saddr = oldflp->fl4_src, - .tos = tos & IPTOS_RT_MASK, - .scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : - RT_SCOPE_UNIVERSE), - } }, + struct flowi fl = { .fl4_dst = oldflp->fl4_dst, + .fl4_src = oldflp->fl4_src, + .fl4_tos = tos & IPTOS_RT_MASK, + .fl4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), .mark = oldflp->mark, .iif = net->loopback_dev->ifindex, .oif = oldflp->oif }; @@ -2695,7 +2664,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth = rcu_dereference_bh(rth->dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && - rth->fl.iif == 0 && + rt_is_output_route(rth) && rth->fl.oif == flp->oif && rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & @@ -2759,9 +2728,6 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi rt->fl = ort->fl; - rt->idev = ort->idev; - if (rt->idev) - in_dev_hold(rt->idev); rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; @@ -2853,7 +2819,7 @@ static int rt_fill_info(struct net *net, if (rt->dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); #endif - if (rt->fl.iif) + if (rt_is_input_route(rt)) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); @@ -2878,7 +2844,7 @@ static int rt_fill_info(struct net *net, } } - if (rt->fl.iif) { + if (rt_is_input_route(rt)) { #ifdef CONFIG_IP_MROUTE __be32 dst = rt->rt_dst; @@ -2973,13 +2939,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = -rt->dst.error; } else { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos, - }, - }, + .fl4_dst = dst, + .fl4_src = src, + .fl4_tos = rtm->rtm_tos, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .mark = mark, }; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650cace2180d..47519205a014 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, */ { struct flowi fl = { .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = IPPROTO_TCP, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = th->dest, - .dport = th->source } } }; + .fl_ip_sport = th->dest, + .fl_ip_dport = th->source }; security_req_classify_flow(req, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f15c36a706ec..6c11eece262c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1193,7 +1193,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), - KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", + "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); #endif @@ -1477,10 +1477,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * shouldn't happen. */ if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), - KERN_INFO "recvmsg bug: copied %X " - "seq %X rcvnxt %X fl %X\n", *seq, - TCP_SKB_CB(skb)->seq, tp->rcv_nxt, - flags)) + "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, + flags)) break; offset = *seq - TCP_SKB_CB(skb)->seq; @@ -1490,10 +1489,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " - "copied %X seq %X rcvnxt %X fl %X\n", - *seq, TCP_SKB_CB(skb)->seq, - tp->rcv_nxt, flags); + WARN(!(flags & MSG_PEEK), + "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); } /* Well, if we have backlog, try to process it now yet. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e13da6de1fc7..4fc3387aa994 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1210,12 +1210,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif -static struct timewait_sock_ops tcp_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; @@ -1347,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->v4daddr == saddr) { + peer->daddr.a4 == saddr) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > @@ -1763,64 +1757,40 @@ do_time_wait: goto discard_it; } -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -int tcp_v4_remember_stamp(struct sock *sk) +struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) { + struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_get(sk); - struct inet_peer *peer = NULL; - int release_it = 0; + struct inet_peer *peer; if (!rt || rt->rt_dst != inet->inet_daddr) { - peer = inet_getpeer(inet->inet_daddr, 1); - release_it = 1; + peer = inet_getpeer_v4(inet->inet_daddr, 1); + *release_it = true; } else { if (!rt->peer) rt_bind_peer(rt, 1); peer = rt->peer; + *release_it = false; } - if (peer) { - if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - peer->tcp_ts = tp->rx_opt.ts_recent; - } - if (release_it) - inet_putpeer(peer); - return 1; - } - - return 0; + return peer; } -EXPORT_SYMBOL(tcp_v4_remember_stamp); +EXPORT_SYMBOL(tcp_v4_get_peer); -int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) +void *tcp_v4_tw_get_peer(struct sock *sk) { - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); - - if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - peer->tcp_ts = tcptw->tw_ts_recent; - } - inet_putpeer(peer); - return 1; - } + struct inet_timewait_sock *tw = inet_twsk(sk); - return 0; + return inet_getpeer_v4(tw->tw_daddr, 1); } +EXPORT_SYMBOL(tcp_v4_tw_get_peer); + +static struct timewait_sock_ops tcp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v4_tw_get_peer, +}; const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, @@ -1828,7 +1798,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a66735f75963..80b1f80759ab 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -49,6 +49,56 @@ struct inet_timewait_death_row tcp_death_row = { }; EXPORT_SYMBOL_GPL(tcp_death_row); +/* VJ's idea. Save last timestamp seen from this destination + * and hold it at least for normal timewait interval to use for duplicate + * segment detection in subsequent connections, before they enter synchronized + * state. + */ + +static int tcp_remember_stamp(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_peer *peer; + bool release_it; + + peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + if (peer) { + if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; + peer->tcp_ts = tp->rx_opt.ts_recent; + } + if (release_it) + inet_putpeer(peer); + return 1; + } + + return 0; +} + +static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw) +{ + struct sock *sk = (struct sock *) tw; + struct inet_peer *peer; + + peer = twsk_getpeer(sk); + if (peer) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + + if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; + peer->tcp_ts = tcptw->tw_ts_recent; + } + inet_putpeer(peer); + return 1; + } + return 0; +} + static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -149,14 +199,9 @@ kill_with_rst: tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - /* I am shamed, but failed to make it more elegant. - * Yes, it is direct reference to IP, which is impossible - * to generalize to IPv6. Taking into account that IPv6 - * do not understand recycling in any case, it not - * a big problem in practice. --ANK */ - if (tw->tw_family == AF_INET && - tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && - tcp_v4_tw_remember_stamp(tw)) + if (tcp_death_row.sysctl_tw_recycle && + tcptw->tw_ts_recent_stamp && + tcp_tw_remember_stamp(tw)) inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, TCP_TIMEWAIT_LEN); else @@ -274,7 +319,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) int recycle_ok = 0; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); + recycle_ok = tcp_remember_stamp(sk); if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 61c2463e2753..97041f24cd27 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; int sysctl_tcp_tso_win_divisor __read_mostly = 3; int sysctl_tcp_mtu_probing __read_mostly = 0; -int sysctl_tcp_base_mss __read_mostly = 512; +int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -824,8 +824,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); - if (tcp_packets_in_flight(tp) == 0) + if (tcp_packets_in_flight(tp) == 0) { tcp_ca_event(sk, CA_EVENT_TX_START); + skb->ooo_okay = 1; + } else + skb->ooo_okay = 0; skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2596,6 +2599,7 @@ int tcp_connect(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; + int err; tcp_connect_init(sk); @@ -2618,7 +2622,9 @@ int tcp_connect(struct sock *sk) sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); tp->packets_out += tcp_skb_pcount(buff); - tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); + err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); + if (err == -ECONNREFUSED) + return err; /* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 6211e2114173..85ee7eb7e38e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -154,7 +154,7 @@ static int tcpprobe_sprint(char *tbuf, int n) struct timespec tv = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); - return snprintf(tbuf, n, + return scnprintf(tbuf, n, "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, @@ -174,7 +174,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, return -EINVAL; while (cnt < len) { - char tbuf[128]; + char tbuf[164]; int width; /* Wait for data in buffer */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5e0a3a582a59..b37181da487c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -430,7 +430,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -500,7 +500,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { @@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rt == NULL) { struct flowi fl = { .oif = ipc.oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, + .fl4_dst = faddr, + .fl4_src = saddr, + .fl4_tos = tos, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = dport }; struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4464f3bff6a7..b057d40addec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -11,6 +11,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/inetdevice.h> +#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> @@ -22,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, xfrm_address_t *daddr) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .tos = tos, - .daddr = daddr->a4, - }, - }, + .fl4_dst = daddr->a4, + .fl4_tos = tos, }; struct dst_entry *dst; struct rtable *rt; @@ -80,10 +77,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt.idev = in_dev_get(dev); - if (!xdst->u.rt.idev) - return -ENODEV; - xdst->u.rt.peer = rt->peer; if (rt->peer) atomic_inc(&rt->peer->refcnt); @@ -158,6 +151,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; + + case IPPROTO_GRE: + if (pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags = (__be16 *)xprth; + __be32 *gre_hdr = (__be32 *)xprth; + + if (greflags[0] & GRE_KEY) { + if (greflags[0] & GRE_CSUM) + gre_hdr++; + fl->fl_gre_key = gre_hdr[1]; + } + } + break; + default: fl->fl_ipsec_spi = 0; break; @@ -189,8 +196,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (likely(xdst->u.rt.idev)) - in_dev_put(xdst->u.rt.idev); if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); @@ -199,27 +204,9 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int unregister) { - struct xfrm_dst *xdst; - if (!unregister) return; - xdst = (struct xfrm_dst *)dst; - if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = - in_dev_get(dev_net(dev)->loopback_dev); - BUG_ON(!loopback_idev); - - do { - in_dev_put(xdst->u.rt.idev); - xdst->u.rt.idev = loopback_idev; - in_dev_hold(loopback_idev); - xdst = (struct xfrm_dst *)xdst->u.dst.child; - } while (xdst->u.dst.xfrm); - - __in_dev_put(loopback_idev); - } - xfrm_dst_ifdown(dst, dev); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 23cc8e1ce8d4..1023ad0d2b15 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3836,6 +3836,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; } +static inline size_t inet6_ifla6_size(void) +{ + return nla_total_size(4) /* IFLA_INET6_FLAGS */ + + nla_total_size(sizeof(struct ifla_cacheinfo)) + + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ + + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ +} + static inline size_t inet6_if_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) @@ -3843,13 +3852,7 @@ static inline size_t inet6_if_nlmsg_size(void) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size( /* IFLA_PROTINFO */ - nla_total_size(4) /* IFLA_INET6_FLAGS */ - + nla_total_size(sizeof(struct ifla_cacheinfo)) - + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ - + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ - + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ - ); + + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */ } static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, @@ -3896,15 +3899,70 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, } } +static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) +{ + struct nlattr *nla; + struct ifla_cacheinfo ci; + + NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); + + ci.max_reasm_len = IPV6_MAXPLEN; + ci.tstamp = cstamp_delta(idev->tstamp); + ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); + ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); + NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); + + nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); + if (nla == NULL) + goto nla_put_failure; + ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); + + /* XXX - MC not implemented */ + + nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); + if (nla == NULL) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); + + nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); + if (nla == NULL) + goto nla_put_failure; + snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static size_t inet6_get_link_af_size(const struct net_device *dev) +{ + if (!__in6_dev_get(dev)) + return 0; + + return inet6_ifla6_size(); +} + +static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +{ + struct inet6_dev *idev = __in6_dev_get(dev); + + if (!idev) + return -ENODATA; + + if (inet6_fill_ifla6_attrs(skb, idev) < 0) + return -EMSGSIZE; + + return 0; +} + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; - struct nlattr *nla; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - struct ifla_cacheinfo ci; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) @@ -3931,30 +3989,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, if (protoinfo == NULL) goto nla_put_failure; - NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); - - ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = cstamp_delta(idev->tstamp); - ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); - ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); - NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); - - nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) - goto nla_put_failure; - ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); - - /* XXX - MC not implemented */ - - nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) - goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); - - nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (inet6_fill_ifla6_attrs(skb, idev) < 0) goto nla_put_failure; - snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); nla_nest_end(skb, protoinfo); return nlmsg_end(skb, nlh); @@ -4625,6 +4661,12 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_inet6addr_notifier); +static struct rtnl_af_ops inet6_ops = { + .family = AF_INET6, + .fill_link_af = inet6_fill_link_af, + .get_link_af_size = inet6_get_link_af_size, +}; + /* * Init / cleanup code */ @@ -4676,6 +4718,10 @@ int __init addrconf_init(void) addrconf_verify(0); + err = rtnl_af_register(&inet6_ops); + if (err < 0) + goto errout_af; + err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); if (err < 0) goto errout; @@ -4691,6 +4737,8 @@ int __init addrconf_init(void) return 0; errout: + rtnl_af_unregister(&inet6_ops); +errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); @@ -4711,6 +4759,8 @@ void addrconf_cleanup(void) rtnl_lock(); + __rtnl_af_unregister(&inet6_ops); + /* clean dev list */ for_each_netdev(&init_net, dev) { if (__in6_dev_get(dev) == NULL) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a1628023bd1..e46305d1815a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,24 +54,54 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); +struct dst_entry *inet6_csk_route_req(struct sock *sk, + const struct request_sock *req) +{ + struct inet6_request_sock *treq = inet6_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *final_p, final; + struct dst_entry *dst; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); + final_p = fl6_update_dst(&fl, np->opt, &final); + ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); + fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet_rsk(req)->rmt_port; + fl.fl_ip_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, &fl); + + if (ip6_dst_lookup(sk, &dst, &fl)) + return NULL; + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + return NULL; + + return dst; +} + /* * request_sock (formerly open request) hash tables. */ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - u32 a = (__force u32)raddr->s6_addr32[0]; - u32 b = (__force u32)raddr->s6_addr32[1]; - u32 c = (__force u32)raddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)raddr->s6_addr32[3]; - b += (__force u32)rport; - __jhash_mix(a, b, c); + u32 c; + + c = jhash_3words((__force u32)raddr->s6_addr32[0], + (__force u32)raddr->s6_addr32[1], + (__force u32)raddr->s6_addr32[2], + rnd); + + c = jhash_2words((__force u32)raddr->s6_addr32[3], + (__force u32)rport, + c); return c & (synq_hsize - 1); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 70e891a20fb9..4f4483e697bd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -58,8 +58,6 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); -#define IPV6_TLV_TEL_DST_SIZE 8 - #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) #else diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6f32ffce7022..9fab274019c0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, fl = (struct flowi) { .oif = vif->link, - .nl_u = { .ip6_u = - { .daddr = ipv6h->daddr, } - } + .fl6_dst = ipv6h->daddr, }; dst = ip6_route_output(net, NULL, &fl); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d1444b95ad7e..49f986d626a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_mc_lock); +static DEFINE_SPINLOCK(ipv6_sk_mc_lock); static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; * socket join on multicast group */ +#define for_each_pmc_rcu(np, pmc) \ + for (pmc = rcu_dereference(np->ipv6_mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next)) + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; @@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - read_lock_bh(&ipv6_sk_mc_lock); - for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); return -EADDRINUSE; } } - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return err; } - write_lock_bh(&ipv6_sk_mc_lock); + spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; - np->ipv6_mc_list = mc_lst; - write_unlock_bh(&ipv6_sk_mc_lock); + rcu_assign_pointer(np->ipv6_mc_list, mc_lst); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); return 0; } +static void ipv6_mc_socklist_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ipv6_mc_socklist, rcu)); +} /* * socket leave on multicast group */ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst, **lnk; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, + lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); return 0; } } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); return -EADDRNOTAVAIL; } @@ -257,7 +271,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return NULL; idev = __in6_dev_get(dev); if (!idev) - return NULL;; + return NULL; read_lock_bh(&idev->lock); if (idev->dead) { read_unlock_bh(&idev->lock); @@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - while ((mc_lst = np->ipv6_mc_list) != NULL) { + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - write_lock_bh(&ipv6_sk_mc_lock); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); + + spin_lock(&ipv6_sk_mc_lock); } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; - read_lock(&ipv6_sk_mc_lock); - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, done: if (pmclocked) write_unlock(&pmc->sflock); - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; - read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; } - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) write_unlock(&pmc->sflock); err = 0; done: - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) @@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, struct ip6_sf_socklist *psl; int rv = 1; - read_lock(&ipv6_sk_mc_lock); - for (mc = np->ipv6_mc_list; mc; mc = mc->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc) { if (ipv6_addr_equal(&mc->addr, mc_addr)) break; } if (!mc) { - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return 1; } read_lock(&mc->sflock); @@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rv = 0; } read_unlock(&mc->sflock); - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return rv; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 998d6d27e7cf..e18f84130203 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -141,18 +141,18 @@ struct neigh_table nd_tbl = { .proxy_redo = pndisc_redo, .id = "ndisc_cache", .parms = { - .tbl = &nd_tbl, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, - .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len = 3, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, + .tbl = &nd_tbl, + .base_reachable_time = ND_REACHABLE_TIME, + .retrans_time = ND_RETRANS_TIMER, + .gc_staletime = 60 * HZ, + .reachable_time = ND_REACHABLE_TIME, + .delay_probe_time = 5 * HZ, + .queue_len = 3, + .ucast_probes = 3, + .mcast_probes = 3, + .anycast_delay = 1 * HZ, + .proxy_delay = (8 * HZ) / 10, + .proxy_qlen = 64, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 7155b2451d7c..35915e8617f0 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb) struct flowi fl = { .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .mark = skb->mark, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, }; dst = ip6_route_output(net, skb->sk, &fl); diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 0a432c9b0795..abfee91ce816 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack -nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o +nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o # defrag -nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o +nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # matches diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0f2766453759..07beeb06f752 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -104,26 +104,22 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, const struct in6_addr *daddr, u32 rnd) { - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); + u32 c; + + c = jhash_3words((__force u32)saddr->s6_addr32[0], + (__force u32)saddr->s6_addr32[1], + (__force u32)saddr->s6_addr32[2], + rnd); + + c = jhash_3words((__force u32)saddr->s6_addr32[3], + (__force u32)daddr->s6_addr32[0], + (__force u32)daddr->s6_addr32[1], + c); + + c = jhash_3words((__force u32)daddr->s6_addr32[2], + (__force u32)daddr->s6_addr32[3], + (__force u32)id, + c); return c & (INETFRAGS_HASHSZ - 1); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96455ffb76fb..026caef0326c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -188,11 +188,29 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct inet_peer *peer = rt->rt6i_peer; if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); } + if (peer) { + BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); + rt->rt6i_peer = NULL; + inet_putpeer(peer); + } +} + +void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer *peer; + + if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) + return; + + peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) + inet_putpeer(peer); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -558,11 +576,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, { struct flowi fl = { .oif = oif, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; @@ -778,13 +792,9 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - }, - }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, + .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, .mark = skb->mark, .proto = iph->nexthdr, }; @@ -1463,12 +1473,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = *dest, - .saddr = *src, - }, - }, + .fl6_dst = *dest, + .fl6_src = *src, }, }; @@ -2465,8 +2471,6 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) - struct rt6_proc_arg { char *buffer; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8c4d00c7cd2b..8ce38f10a547 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -731,10 +731,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) } }, + struct flowi fl = { .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { @@ -856,10 +855,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) } }, + struct flowi fl = { .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; struct rtable *rt; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7e41e2cbb85e..319458558df9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct rt6_info *rt; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); + rt = (struct rt6_info *) dst; + if (tcp_death_row.sysctl_tw_recycle && + !tp->rx_opt.ts_recent_stamp && + ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { + struct inet_peer *peer = rt6_get_peer(rt); + /* + * VJ's idea. We save last timestamp seen from + * the destination in peer table, when entering state + * TIME-WAIT * and initialize rx_opt.ts_recent from it, + * when trying new connection. + */ + if (peer) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; + tp->rx_opt.ts_recent = peer->tcp_ts; + } + } + } + icsk->icsk_ext_hdr_len = 0; if (np->opt) icsk->icsk_ext_hdr_len = (np->opt->opt_flen + @@ -906,12 +927,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static struct timewait_sock_ops tcp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - static void __tcp_v6_send_check(struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -1176,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; + struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1273,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, tcp_hdr(skb)); if (!isn) { + struct inet_peer *peer = NULL; + if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1285,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) treq->iif = inet6_iif(skb); - if (!want_cookie) { - isn = tcp_v6_init_sequence(skb); - } else { + + if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; + goto have_isn; } + + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && + tcp_death_row.sysctl_tw_recycle && + (dst = inet6_csk_route_req(sk, req)) != NULL && + (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && + ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, + &treq->rmt_addr)) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && + (s32)(peer->tcp_ts - req->ts_recent) > + TCP_PAWS_WINDOW) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + (!peer || !peer->tcp_ts_stamp) && + (!dst || !dst_metric(dst, RTAX_RTT))) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", + &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); + goto drop_and_release; + } + + isn = tcp_v6_init_sequence(skb); } +have_isn: tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); @@ -1304,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; +drop_and_release: + dst_release(dst); drop_and_free: reqsk_free(req); drop: @@ -1382,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (dst == NULL) { - struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + if (!dst) { + dst = inet6_csk_route_req(sk, req); + if (!dst) goto out; } @@ -1818,19 +1863,51 @@ do_time_wait: goto discard_it; } -static int tcp_v6_remember_stamp(struct sock *sk) +static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { - /* Alas, not yet... */ - return 0; + struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_peer *peer; + + if (!rt || + !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { + peer = inet_getpeer_v6(&np->daddr, 1); + *release_it = true; + } else { + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + peer = rt->rt6i_peer; + *release_it = true; + } + + return peer; } +static void *tcp_v6_tw_get_peer(struct sock *sk) +{ + struct inet6_timewait_sock *tw6 = inet6_twsk(sk); + struct inet_timewait_sock *tw = inet_twsk(sk); + + if (tw->tw_family == AF_INET) + return tcp_v4_tw_get_peer(sk); + + return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); +} + +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v6_tw_get_peer, +}; + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v6_remember_stamp, + .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -1862,7 +1939,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93bec85..b541a4e009fb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -227,7 +227,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -294,7 +294,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, daddr, dport, dif) < badness)) { diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile index 48689458c086..ab23b5ba7e33 100644 --- a/net/irda/ircomm/Makefile +++ b/net/irda/ircomm/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o -ircomm-objs := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o -ircomm-tty-objs := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o +ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o +ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile index 77549bc8641b..94eefbc8e6b9 100644 --- a/net/irda/irlan/Makefile +++ b/net/irda/irlan/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IRLAN) += irlan.o -irlan-objs := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o +irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile index b3ee01e0def3..61c365c8a2a0 100644 --- a/net/irda/irnet/Makefile +++ b/net/irda/irnet/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IRNET) += irnet.o -irnet-objs := irnet_ppp.o irnet_irda.o +irnet-y := irnet_ppp.o irnet_irda.o diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 522e219f3558..110efb704c9b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -476,15 +476,13 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m { struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = { - .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = { - .sport = inet->inet_sport, - .dport = inet->inet_dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times diff --git a/net/lapb/Makefile b/net/lapb/Makefile index 53f7c90db163..fff797dfc88c 100644 --- a/net/lapb/Makefile +++ b/net/lapb/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_LAPB) += lapb.o -lapb-objs := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o +lapb-y := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index e35dbe55f520..dfd3a648a551 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -316,7 +316,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (unlikely(addr->sllc_family != AF_LLC)) goto out; rc = -ENODEV; - rtnl_lock(); rcu_read_lock(); if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); @@ -334,10 +333,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) } } } else - llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, + llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); + if (llc->dev) + dev_hold(llc->dev); rcu_read_unlock(); - rtnl_unlock(); if (!llc->dev) goto out; if (!addr->sllc_sap) { diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index d2b03e0851ef..4bd6ef0be380 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -147,6 +147,5 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) void ieee80211_aes_key_free(struct crypto_cipher *tfm) { - if (tfm) - crypto_free_cipher(tfm); + crypto_free_cipher(tfm); } diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index b4d66cca76d6..d502b2684a66 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -128,6 +128,5 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]) void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm) { - if (tfm) - crypto_free_cipher(tfm); + crypto_free_cipher(tfm); } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 18bd0e550600..0c544074479e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1299,6 +1299,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) struct ieee80211_local *local = wiphy_priv(wiphy); int err; + if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { + err = drv_set_frag_threshold(local, wiphy->frag_threshold); + + if (err) + return err; + } + if (changed & WIPHY_PARAM_COVERAGE_CLASS) { err = drv_set_coverage_class(local, wiphy->coverage_class); @@ -1621,6 +1628,23 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, ieee80211_queue_work(&local->hw, &local->reconfig_filter); } +static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (local->started) + return -EOPNOTSUPP; + + return drv_set_antenna(local, tx_ant, rx_ant); +} + +static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return drv_get_antenna(local, tx_ant, rx_ant); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1673,4 +1697,6 @@ struct cfg80211_ops mac80211_config_ops = { .mgmt_tx = ieee80211_mgmt_tx, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .mgmt_frame_register = ieee80211_mgmt_frame_register, + .set_antenna = ieee80211_set_antenna, + .get_antenna = ieee80211_get_antenna, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 18260aa99c56..1f02e599a318 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -21,16 +21,30 @@ int mac80211_open_file_generic(struct inode *inode, struct file *file) return 0; } -#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...) \ +#define DEBUGFS_FORMAT_BUFFER_SIZE 100 + +int mac80211_format_buffer(char __user *userbuf, size_t count, + loff_t *ppos, char *fmt, ...) +{ + va_list args; + char buf[DEBUGFS_FORMAT_BUFFER_SIZE]; + int res; + + va_start(args, fmt); + res = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + return simple_read_from_buffer(userbuf, count, ppos, buf, res); +} + +#define DEBUGFS_READONLY_FILE(name, fmt, value...) \ static ssize_t name## _read(struct file *file, char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ struct ieee80211_local *local = file->private_data; \ - char buf[buflen]; \ - int res; \ \ - res = scnprintf(buf, buflen, fmt "\n", ##value); \ - return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ + return mac80211_format_buffer(userbuf, count, ppos, \ + fmt "\n", ##value); \ } \ \ static const struct file_operations name## _ops = { \ @@ -46,13 +60,13 @@ static const struct file_operations name## _ops = { \ debugfs_create_file(#name, mode, phyd, local, &name## _ops); -DEBUGFS_READONLY_FILE(frequency, 20, "%d", +DEBUGFS_READONLY_FILE(frequency, "%d", local->hw.conf.channel->center_freq); -DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", +DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", local->total_ps_buffered); -DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", +DEBUGFS_READONLY_FILE(wep_iv, "%#08x", local->wep_iv & 0xffffff); -DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", +DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); static ssize_t tsf_read(struct file *file, char __user *user_buf, @@ -60,13 +74,11 @@ static ssize_t tsf_read(struct file *file, char __user *user_buf, { struct ieee80211_local *local = file->private_data; u64 tsf; - char buf[100]; tsf = drv_get_tsf(local); - snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf); - - return simple_read_from_buffer(user_buf, count, ppos, buf, 19); + return mac80211_format_buffer(user_buf, count, ppos, "0x%016llx\n", + (unsigned long long) tsf); } static ssize_t tsf_write(struct file *file, @@ -131,12 +143,9 @@ static ssize_t noack_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int res; - char buf[10]; - res = scnprintf(buf, sizeof(buf), "%d\n", local->wifi_wme_noack_test); - - return simple_read_from_buffer(user_buf, count, ppos, buf, res); + return mac80211_format_buffer(user_buf, count, ppos, "%d\n", + local->wifi_wme_noack_test); } static ssize_t noack_write(struct file *file, @@ -168,12 +177,8 @@ static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int res; - char buf[10]; - - res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_queues); - - return simple_read_from_buffer(user_buf, count, ppos, buf, res); + return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n", + local->uapsd_queues); } static ssize_t uapsd_queues_write(struct file *file, @@ -215,12 +220,9 @@ static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - int res; - char buf[10]; - res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_max_sp_len); - - return simple_read_from_buffer(user_buf, count, ppos, buf, res); + return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n", + local->uapsd_max_sp_len); } static ssize_t uapsd_max_sp_len_write(struct file *file, diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index 09cc9be34796..7c87529630f5 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -4,6 +4,8 @@ #ifdef CONFIG_MAC80211_DEBUGFS extern void debugfs_hw_add(struct ieee80211_local *local); extern int mac80211_open_file_generic(struct inode *inode, struct file *file); +extern int mac80211_format_buffer(char __user *userbuf, size_t count, + loff_t *ppos, char *fmt, ...); #else static inline void debugfs_hw_add(struct ieee80211_local *local) { diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 1243d1db5c59..5822a6ce7671 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -15,18 +15,17 @@ #include "debugfs.h" #include "debugfs_key.h" -#define KEY_READ(name, prop, buflen, format_string) \ +#define KEY_READ(name, prop, format_string) \ static ssize_t key_##name##_read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - char buf[buflen]; \ struct ieee80211_key *key = file->private_data; \ - int res = scnprintf(buf, buflen, format_string, key->prop); \ - return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ + return mac80211_format_buffer(userbuf, count, ppos, \ + format_string, key->prop); \ } -#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n") -#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n") +#define KEY_READ_D(name) KEY_READ(name, name, "%d\n") +#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n") #define KEY_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -39,9 +38,9 @@ static const struct file_operations key_ ##name## _ops = { \ KEY_READ_##format(name) \ KEY_OPS(name) -#define KEY_CONF_READ(name, buflen, format_string) \ - KEY_READ(conf_##name, conf.name, buflen, format_string) -#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n") +#define KEY_CONF_READ(name, format_string) \ + KEY_READ(conf_##name, conf.name, format_string) +#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n") #define KEY_CONF_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -59,7 +58,7 @@ KEY_CONF_FILE(keyidx, D); KEY_CONF_FILE(hw_key_idx, D); KEY_FILE(flags, X); KEY_FILE(tx_rx_count, D); -KEY_READ(ifindex, sdata->name, IFNAMSIZ + 2, "%s\n"); +KEY_READ(ifindex, sdata->name, "%s\n"); KEY_OPS(ifindex); static ssize_t key_algorithm_read(struct file *file, diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 4601fea1784d..f0fce37f4069 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -17,20 +17,18 @@ /* sta attributtes */ -#define STA_READ(name, buflen, field, format_string) \ +#define STA_READ(name, field, format_string) \ static ssize_t sta_ ##name## _read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - int res; \ struct sta_info *sta = file->private_data; \ - char buf[buflen]; \ - res = scnprintf(buf, buflen, format_string, sta->field); \ - return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ + return mac80211_format_buffer(userbuf, count, ppos, \ + format_string, sta->field); \ } -#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") -#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") -#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") +#define STA_READ_D(name, field) STA_READ(name, field, "%d\n") +#define STA_READ_U(name, field) STA_READ(name, field, "%u\n") +#define STA_READ_S(name, field) STA_READ(name, field, "%s\n") #define STA_OPS(name) \ static const struct file_operations sta_ ##name## _ops = { \ @@ -79,22 +77,18 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[20]; struct sta_info *sta = file->private_data; - int res = scnprintf(buf, sizeof(buf), "%u\n", - skb_queue_len(&sta->ps_tx_buf)); - return simple_read_from_buffer(userbuf, count, ppos, buf, res); + return mac80211_format_buffer(userbuf, count, ppos, "%u\n", + skb_queue_len(&sta->ps_tx_buf)); } STA_OPS(num_ps_buf_frames); static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[20]; struct sta_info *sta = file->private_data; - int res = scnprintf(buf, sizeof(buf), "%d\n", - jiffies_to_msecs(jiffies - sta->last_rx)); - return simple_read_from_buffer(userbuf, count, ppos, buf, res); + return mac80211_format_buffer(userbuf, count, ppos, "%d\n", + jiffies_to_msecs(jiffies - sta->last_rx)); } STA_OPS(inactive_ms); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 16983825f8e8..4244554d218a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -233,6 +233,20 @@ static inline void drv_get_tkip_seq(struct ieee80211_local *local, trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); } +static inline int drv_set_frag_threshold(struct ieee80211_local *local, + u32 value) +{ + int ret = 0; + + might_sleep(); + + trace_drv_set_frag_threshold(local, value); + if (local->ops->set_frag_threshold) + ret = local->ops->set_frag_threshold(&local->hw, value); + trace_drv_return_int(local, ret); + return ret; +} + static inline int drv_set_rts_threshold(struct ieee80211_local *local, u32 value) { @@ -428,4 +442,27 @@ static inline void drv_channel_switch(struct ieee80211_local *local, trace_drv_return_void(local); } + +static inline int drv_set_antenna(struct ieee80211_local *local, + u32 tx_ant, u32 rx_ant) +{ + int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->set_antenna) + ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant); + trace_drv_set_antenna(local, tx_ant, rx_ant, ret); + return ret; +} + +static inline int drv_get_antenna(struct ieee80211_local *local, + u32 *tx_ant, u32 *rx_ant) +{ + int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->get_antenna) + ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant); + trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret); + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6831fb1641c8..c2772f23ac9c 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -531,6 +531,27 @@ TRACE_EVENT(drv_get_tkip_seq, ) ); +TRACE_EVENT(drv_set_frag_threshold, + TP_PROTO(struct ieee80211_local *local, u32 value), + + TP_ARGS(local, value), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, value) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->value = value; + ), + + TP_printk( + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value + ) +); + TRACE_EVENT(drv_set_rts_threshold, TP_PROTO(struct ieee80211_local *local, u32 value), @@ -862,6 +883,56 @@ TRACE_EVENT(drv_channel_switch, ) ); +TRACE_EVENT(drv_set_antenna, + TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + + TP_ARGS(local, tx_ant, rx_ant, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx_ant) + __field(u32, rx_ant) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx_ant = tx_ant; + __entry->rx_ant = rx_ant; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + ) +); + +TRACE_EVENT(drv_get_antenna, + TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + + TP_ARGS(local, tx_ant, rx_ant, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx_ant) + __field(u32, rx_ant) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx_ant = tx_ant; + __entry->rx_ant = rx_ant; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 239c4836a946..410d104b1347 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -915,6 +915,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.basic_rates = params->basic_rates; + memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, + sizeof(params->mcast_rate)); sdata->vif.bss_conf.beacon_int = params->beacon_interval; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b80c38689927..5bc0745368fe 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -349,6 +349,7 @@ struct ieee80211_if_managed { struct work_struct chswitch_work; struct work_struct beacon_connection_loss_work; + unsigned long beacon_timeout; unsigned long probe_timeout; int probe_send_count; @@ -1264,6 +1265,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, int powersave); void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr); +void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr); void ieee80211_beacon_connection_loss_work(struct work_struct *work); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, @@ -1278,6 +1281,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); int ieee80211_add_pending_skbs(struct ieee80211_local *local, struct sk_buff_head *skbs); +int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, @@ -1287,6 +1293,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, u8 channel); +struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, + u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ccd676b2f599..72df1ca7299b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -84,10 +84,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) goto out_unsupported; sdata = key->sdata; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + /* + * The driver doesn't know anything about VLAN interfaces. + * Hence, don't send GTKs for VLAN interfaces to the driver. + */ + if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) + goto out_unsupported; sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); + } ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a3a9421555af..794807914940 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -28,13 +28,19 @@ #include "rate.h" #include "led.h" +#define IEEE80211_MAX_NULLFUNC_TRIES 2 #define IEEE80211_MAX_PROBE_TRIES 5 /* - * beacon loss detection timeout - * XXX: should depend on beacon interval + * Beacon loss timeout is calculated as N frames times the + * advertised beacon interval. This may need to be somewhat + * higher than what hardware might detect to account for + * delays in the host processing frames. But since we also + * probe on beacon miss before declaring the connection lost + * default to what we want. */ -#define IEEE80211_BEACON_LOSS_TIME (2 * HZ) +#define IEEE80211_BEACON_LOSS_COUNT 7 + /* * Time the connection can be idle before we probe * it to see if we can still talk to the AP. @@ -121,7 +127,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) return; mod_timer(&sdata->u.mgd.bcn_mon_timer, - round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); + round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout)); } void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) @@ -871,6 +877,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= ieee80211_handle_bss_capability(sdata, cbss->capability, bss->has_erp_value, bss->erp_value); + sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec( + IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int)); + sdata->u.mgd.associated = cbss; memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); @@ -1026,6 +1035,51 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, ieee80211_sta_reset_conn_monitor(sdata); } +static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + if (!(ifmgd->flags & (IEEE80211_STA_BEACON_POLL | + IEEE80211_STA_CONNECTION_POLL))) + return; + + ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | + IEEE80211_STA_BEACON_POLL); + mutex_lock(&sdata->local->iflist_mtx); + ieee80211_recalc_ps(sdata->local, -1); + mutex_unlock(&sdata->local->iflist_mtx); + + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) + return; + + /* + * We've received a probe response, but are not sure whether + * we have or will be receiving any beacons or data, so let's + * schedule the timers again, just in case. + */ + ieee80211_sta_reset_beacon_monitor(sdata); + + mod_timer(&ifmgd->conn_mon_timer, + round_jiffies_up(jiffies + + IEEE80211_CONNECTION_IDLE_TIME)); +} + +void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr) +{ + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_nullfunc(hdr->frame_control)) + return; + + ieee80211_sta_reset_conn_monitor(sdata); + + if (ieee80211_is_nullfunc(hdr->frame_control) && + sdata->u.mgd.probe_send_count > 0) { + sdata->u.mgd.probe_send_count = 0; + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + } +} + static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1041,8 +1095,19 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) if (ifmgd->probe_send_count >= unicast_limit) dst = NULL; - ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); - ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0); + /* + * When the hardware reports an accurate Tx ACK status, it's + * better to send a nullfunc frame instead of a probe request, + * as it will kick us off the AP quickly if we aren't associated + * anymore. The timeout will be reset if the frame is ACKed by + * the AP. + */ + if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + ieee80211_send_nullfunc(sdata->local, sdata, 0); + else { + ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); + ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0); + } ifmgd->probe_send_count++; ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; @@ -1108,6 +1173,30 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); } +struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct sk_buff *skb; + const u8 *ssid; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return NULL; + + ASSERT_MGD_MTX(ifmgd); + + if (!ifmgd->associated) + return NULL; + + ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); + skb = ieee80211_build_probe_req(sdata, ifmgd->associated->bssid, + ssid + 2, ssid[1], NULL, 0); + + return skb; +} +EXPORT_SYMBOL(ieee80211_ap_probereq_get); + static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1485,29 +1574,8 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); if (ifmgd->associated && - memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0 && - ifmgd->flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL)) { - ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | - IEEE80211_STA_BEACON_POLL); - mutex_lock(&sdata->local->iflist_mtx); - ieee80211_recalc_ps(sdata->local, -1); - mutex_unlock(&sdata->local->iflist_mtx); - - if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) - return; - - /* - * We've received a probe response, but are not sure whether - * we have or will be receiving any beacons or data, so let's - * schedule the timers again, just in case. - */ - ieee80211_sta_reset_beacon_monitor(sdata); - - mod_timer(&ifmgd->conn_mon_timer, - round_jiffies_up(jiffies + - IEEE80211_CONNECTION_IDLE_TIME)); - } + memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0) + ieee80211_reset_ap_probe(sdata); } /* @@ -1857,12 +1925,23 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL) && ifmgd->associated) { u8 bssid[ETH_ALEN]; + int max_tries; memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - if (time_is_after_jiffies(ifmgd->probe_timeout)) + + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + max_tries = IEEE80211_MAX_NULLFUNC_TRIES; + else + max_tries = IEEE80211_MAX_PROBE_TRIES; + + /* ACK received for nullfunc probing frame */ + if (!ifmgd->probe_send_count) + ieee80211_reset_ap_probe(sdata); + + else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(ifmgd, ifmgd->probe_timeout); - else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) { + else if (ifmgd->probe_send_count < max_tries) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No probe response from AP %pM" @@ -1988,6 +2067,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) add_timer(&ifmgd->timer); if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) add_timer(&ifmgd->chswitch_timer); + ieee80211_sta_reset_beacon_monitor(sdata); + ieee80211_restart_sta_timer(sdata); } #endif diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 33f76993da08..3d5a2cb835c4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -211,7 +211,8 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); } -static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) +static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, + struct ieee80211_supported_band *sband) { u8 i; @@ -222,7 +223,7 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) if (basic_rates & (1 << *idx)) return; /* selected rate is a basic rate */ - for (i = *idx + 1; i <= max_rate_idx; i++) { + for (i = *idx + 1; i <= sband->n_bitrates; i++) { if (basic_rates & (1 << i)) { *idx = i; return; @@ -237,16 +238,25 @@ bool rate_control_send_low(struct ieee80211_sta *sta, struct ieee80211_tx_rate_control *txrc) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); + struct ieee80211_supported_band *sband = txrc->sband; + int mcast_rate; if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) { info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; - if (!sta && txrc->ap) + if (!sta && txrc->bss) { + mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; + if (mcast_rate > 0) { + info->control.rates[0].idx = mcast_rate - 1; + return true; + } + rc_send_low_broadcast(&info->control.rates[0].idx, txrc->bss_conf->basic_rates, - txrc->sband->n_bitrates); + sband); + } return true; } return false; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 2a18d6602d4a..4ad7a362fcc1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -371,6 +371,9 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru if (likely(sta->ampdu_mlme.tid_tx[tid])) return; + if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO) + return; + ieee80211_start_tx_ba_session(pubsta, tid); } @@ -407,8 +410,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->ampdu_len += info->status.ampdu_len; if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { - mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); - mi->sample_tries = 3; + mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); + mi->sample_tries = 2; mi->sample_count--; } @@ -506,7 +509,9 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (!mr->retry_updated) minstrel_calc_retransmit(mp, mi, index); - if (mr->probability < MINSTREL_FRAC(20, 100)) + if (sample) + rate->count = 1; + else if (mr->probability < MINSTREL_FRAC(20, 100)) rate->count = 2; else if (rtscts) rate->count = mr->retry_count_rtscts; @@ -562,7 +567,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) */ if (minstrel_get_duration(sample_idx) > minstrel_get_duration(mi->max_tp_rate)) { - if (mr->sample_skipped < 10) + if (mr->sample_skipped < 20) goto next; if (mi->sample_slow++ > 2) @@ -586,6 +591,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_priv *mp = priv; int sample_idx; + bool sample = false; if (rate_control_send_low(sta, priv_sta, txrc)) return; @@ -596,10 +602,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, info->flags |= mi->tx_flags; sample_idx = minstrel_get_sample_rate(mp, mi); if (sample_idx >= 0) { + sample = true; minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, txrc, true, false); minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, - txrc, false, true); + txrc, false, false); info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, @@ -607,7 +614,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, txrc, false, true); } - minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true); + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample); ar[3].count = 0; ar[3].idx = -1; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 54fb4a0e76f0..55337709de41 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1102,8 +1102,6 @@ static void ap_sta_ps_end(struct sta_info *sta) atomic_dec(&sdata->bss->num_sta_ps); - clear_sta_flags(sta, WLAN_STA_PS_STA); - #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n", sdata->name, sta->sta.addr, sta->sta.aid); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 6d8f897d8763..eff58571fd7e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -199,8 +199,11 @@ static void sta_unblock(struct work_struct *wk) if (!test_sta_flags(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); - else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) + else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) { + clear_sta_flags(sta, WLAN_STA_PS_DRIVER); ieee80211_sta_ps_deliver_poll_response(sta); + } else + clear_sta_flags(sta, WLAN_STA_PS_DRIVER); } static int sta_prepare_rate_control(struct ieee80211_local *local, @@ -880,6 +883,13 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_find_sta); +static void clear_sta_ps_flags(void *_sta) +{ + struct sta_info *sta = _sta; + + clear_sta_flags(sta, WLAN_STA_PS_DRIVER | WLAN_STA_PS_STA); +} + /* powersave support code */ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { @@ -894,7 +904,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) /* Send all buffered frames to the station */ sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); - buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); + buffered = ieee80211_add_pending_skbs_fn(local, &sta->ps_tx_buf, + clear_sta_ps_flags, sta); sent += buffered; local->total_ps_buffered -= buffered; @@ -973,7 +984,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, if (block) set_sta_flags(sta, WLAN_STA_PS_DRIVER); - else + else if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) ieee80211_queue_work(hw, &sta->drv_unblock_wk); } EXPORT_SYMBOL(ieee80211_sta_block_awake); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9265acadef32..b562d9b6a702 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -248,6 +248,7 @@ enum plink_state { * @sta: station information we share with the driver * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver + * @lost_packets: number of consecutive lost packets */ struct sta_info { /* General information, mostly static */ @@ -335,6 +336,8 @@ struct sta_info { } debugfs; #endif + unsigned int lost_packets; + /* keep last! */ struct ieee80211_sta sta; }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 3153c19893b8..bed7e32ed908 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -155,8 +155,21 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) ieee80211_queue_work(&local->hw, &local->recalc_smps); } + + if ((sdata->vif.type == NL80211_IFTYPE_STATION) && + (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + ieee80211_sta_tx_notify(sdata, (void *) skb->data); } +/* + * Use a static threshold for now, best value to be determined + * by testing ... + * Should it depend on: + * - on # of retransmissions + * - current throughput (higher value for higher tpt)? + */ +#define STA_LOST_PKT_THRESHOLD 50 + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -243,6 +256,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && (info->flags & IEEE80211_TX_STAT_ACK)) ieee80211_frame_acked(sta, skb); + + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + if (info->flags & IEEE80211_TX_STAT_ACK) { + if (sta->lost_packets) + sta->lost_packets = 0; + } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) { + cfg80211_cqm_pktloss_notify(sta->sdata->dev, + sta->sta.addr, + sta->lost_packets, + GFP_ATOMIC); + sta->lost_packets = 0; + } + } } rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index df6aac523532..2ba742656825 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -622,7 +622,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP; + txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || + tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); /* set up RTS protection if desired */ if (len > tx->local->hw.wiphy->rts_threshold) { @@ -1033,6 +1034,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, struct ieee80211_radiotap_header *rthdr = (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; + bool hw_frag; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, NULL); @@ -1042,6 +1044,9 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; tx->flags &= ~IEEE80211_TX_FRAGMENTED; + /* packet is fragmented in HW if we have a non-NULL driver callback */ + hw_frag = (tx->local->ops->set_frag_threshold != NULL); + /* * for every radiotap entry that is present * (ieee80211_radiotap_iterator_next returns -ENOENT when no more @@ -1078,7 +1083,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) && + !hw_frag) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -1181,8 +1187,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. + * Only valid when fragmentation is done by the stack. */ - tx->flags |= IEEE80211_TX_FRAGMENTED; + if (!local->ops->set_frag_threshold) + tx->flags |= IEEE80211_TX_FRAGMENTED; /* process and remove the injection radiotap header */ if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { @@ -2306,7 +2314,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - txrc.ap = true; + txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); info->control.vif = vif; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0b6fc92bc0d7..e497476174ce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -368,8 +368,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -int ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs) +int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data) { struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; @@ -394,6 +395,9 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local, __skb_queue_tail(&local->pending[queue], skb); } + if (fn) + fn(data); + for (i = 0; i < hw->queues; i++) __ieee80211_wake_queue(hw, i, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); @@ -402,6 +406,12 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local, return ret; } +int ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) +{ + return ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); +} + void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason) { @@ -1011,9 +1021,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, return pos - buffer; } -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len) +struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, + u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -1027,7 +1038,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, if (!buf) { printk(KERN_DEBUG "%s: failed to allocate temporary IE " "buffer\n", sdata->name); - return; + return NULL; } chan = ieee80211_frequency_to_channel( @@ -1050,8 +1061,20 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - ieee80211_tx_skb(sdata, skb); kfree(buf); + + return skb; +} + +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) +{ + struct sk_buff *skb; + + skb = ieee80211_build_probe_req(sdata, dst, ssid, ssid_len, ie, ie_len); + if (skb) + ieee80211_tx_skb(sdata, skb); } u32 ieee80211_sta_get_rates(struct ieee80211_local *local, @@ -1152,6 +1175,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) } mutex_unlock(&local->sta_mtx); + /* setup fragmentation threshold */ + drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + /* setup RTS threshold */ drv_set_rts_threshold(local, hw->wiphy->rts_threshold); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 34e6d02da779..58e75bbc1f91 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -21,7 +21,16 @@ /* Default mapping in classifier to work with default * queue setup. */ -const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; +const int ieee802_1d_to_ac[8] = { + IEEE80211_AC_BE, + IEEE80211_AC_BK, + IEEE80211_AC_BK, + IEEE80211_AC_BE, + IEEE80211_AC_VI, + IEEE80211_AC_VI, + IEEE80211_AC_VO, + IEEE80211_AC_VO +}; static int wme_downgrade_ac(struct sk_buff *skb) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 85dabb86be6f..32fcbe290c04 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -173,9 +173,11 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - } else if (verdict == NF_DROP) { + } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { kfree_skb(skb); - ret = -EPERM; + ret = -(verdict >> NF_VERDICT_BITS); + if (ret == 0) + ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5f5daa30b0af..c6f293639220 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -110,10 +110,8 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) struct rt6_info *rt; struct flowi fl = { .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *addr, - .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + .fl6_dst = *addr, + .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index de04ea39cde8..5325a3fbe4ac 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -96,12 +96,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = dest->addr.ip, - .saddr = 0, - .tos = rtos, } }, + .fl4_dst = dest->addr.ip, + .fl4_tos = rtos, }; if (ip_route_output_key(net, &rt, &fl)) { @@ -118,12 +114,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); } else { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = 0, - .tos = rtos, } }, + .fl4_dst = daddr, + .fl4_tos = rtos, }; if (ip_route_output_key(net, &rt, &fl)) { @@ -169,7 +161,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb) struct net *net = dev_net(dev); struct iphdr *iph = ip_hdr(skb); - if (rt->fl.iif) { + if (rt_is_input_route(rt)) { unsigned long orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, @@ -178,14 +170,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb) refdst_drop(orefdst); } else { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos), - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .mark = skb->mark, }; struct rtable *rt; @@ -216,12 +203,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, { struct dst_entry *dst; struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; dst = ip6_route_output(net, NULL, &fl); @@ -552,7 +534,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { + if (local && ipv4_is_loopback(rt->rt_dst) && + rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); goto tx_error_put; @@ -1165,7 +1148,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { + if (local && ipv4_is_loopback(rt->rt_dst) && + rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 22a2d421e7eb..5128a6c4cb2c 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -70,9 +70,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; fl.oif = info->priv->oif; } - fl.nl_u.ip4_u.daddr = info->gw.ip; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE; + fl.fl4_dst = info->gw.ip; + fl.fl4_tos = RT_TOS(iph->tos); + fl.fl4_scope = RT_SCOPE_UNIVERSE; if (ip_route_output_key(net, &rt, &fl) != 0) return false; @@ -150,9 +150,9 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; fl.oif = info->priv->oif; } - fl.nl_u.ip6_u.daddr = info->gw.in6; - fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | - (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; + fl.fl6_dst = info->gw.in6; + fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return false; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8298e676f5a0..246a04a13234 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -61,6 +61,7 @@ #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> @@ -163,8 +164,13 @@ struct packet_mreq_max { static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing, int tx_ring); +#define PGV_FROM_VMALLOC 1 +struct pgv { + char *buffer; +}; + struct packet_ring_buffer { - char **pg_vec; + struct pgv *pg_vec; unsigned int head; unsigned int frames_per_block; unsigned int frame_size; @@ -217,6 +223,13 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) +static inline __pure struct page *pgv_to_page(void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + return virt_to_page(addr); +} + static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union { @@ -229,11 +242,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) switch (po->tp_version) { case TPACKET_V1: h.h1->tp_status = status; - flush_dcache_page(virt_to_page(&h.h1->tp_status)); + flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: h.h2->tp_status = status; - flush_dcache_page(virt_to_page(&h.h2->tp_status)); + flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; default: pr_err("TPACKET version not supported\n"); @@ -256,10 +269,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame) h.raw = frame; switch (po->tp_version) { case TPACKET_V1: - flush_dcache_page(virt_to_page(&h.h1->tp_status)); + flush_dcache_page(pgv_to_page(&h.h1->tp_status)); return h.h1->tp_status; case TPACKET_V2: - flush_dcache_page(virt_to_page(&h.h2->tp_status)); + flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; default: pr_err("TPACKET version not supported\n"); @@ -283,7 +296,8 @@ static void *packet_lookup_frame(struct packet_sock *po, pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; - h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size); + h.raw = rb->pg_vec[pg_vec_pos].buffer + + (frame_offset * rb->frame_size); if (status != __packet_get_status(po, h.raw)) return NULL; @@ -503,7 +517,8 @@ out_free: return err; } -static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, +static inline unsigned int run_filter(const struct sk_buff *skb, + const struct sock *sk, unsigned int res) { struct sk_filter *filter; @@ -511,22 +526,22 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, rcu_read_lock_bh(); filter = rcu_dereference_bh(sk->sk_filter); if (filter != NULL) - res = sk_run_filter(skb, filter->insns, filter->len); + res = sk_run_filter(skb, filter->insns); rcu_read_unlock_bh(); return res; } /* - This function makes lazy skb cloning in hope that most of packets - are discarded by BPF. - - Note tricky part: we DO mangle shared skb! skb->data, skb->len - and skb->cb are mangled. It works because (and until) packets - falling here are owned by current CPU. Output packets are cloned - by dev_queue_xmit_nit(), input packets are processed by net_bh - sequencially, so that if we return skb to original state on exit, - we will not harm anyone. + * This function makes lazy skb cloning in hope that most of packets + * are discarded by BPF. + * + * Note tricky part: we DO mangle shared skb! skb->data, skb->len + * and skb->cb are mangled. It works because (and until) packets + * falling here are owned by current CPU. Output packets are cloned + * by dev_queue_xmit_nit(), input packets are processed by net_bh + * sequencially, so that if we return skb to original state on exit, + * we will not harm anyone. */ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, @@ -552,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, if (dev->header_ops) { /* The device has an explicit notion of ll header, - exported to higher levels. - - Otherwise, the device hides datails of it frame - structure, so that corresponding packet head - never delivered to user. + * exported to higher levels. + * + * Otherwise, the device hides details of its frame + * structure, so that corresponding packet head is + * never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); @@ -791,17 +806,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __packet_set_status(po, h.raw, status); smp_mb(); +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 { - struct page *p_start, *p_end; - u8 *h_end = h.raw + macoff + snaplen - 1; - - p_start = virt_to_page(h.raw); - p_end = virt_to_page(h_end); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } + u8 *start, *end; + + end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); } +#endif sk->sk_data_ready(sk, 0); @@ -907,7 +920,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, } err = -EFAULT; - page = virt_to_page(data); offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); @@ -926,11 +938,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return -EFAULT; } + page = pgv_to_page(data); + data += len; flush_dcache_page(page); get_page(page); - skb_fill_page_desc(skb, - nr_frags, - page++, offset, len); + skb_fill_page_desc(skb, nr_frags, page, offset, len); to_write -= len; offset = 0; len_max = PAGE_SIZE; @@ -2325,37 +2337,70 @@ static const struct vm_operations_struct packet_mmap_ops = { .close = packet_mm_close, }; -static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) +static void free_pg_vec(struct pgv *pg_vec, unsigned int order, + unsigned int len) { int i; for (i = 0; i < len; i++) { - if (likely(pg_vec[i])) - free_pages((unsigned long) pg_vec[i], order); + if (likely(pg_vec[i].buffer)) { + if (is_vmalloc_addr(pg_vec[i].buffer)) + vfree(pg_vec[i].buffer); + else + free_pages((unsigned long)pg_vec[i].buffer, + order); + pg_vec[i].buffer = NULL; + } } kfree(pg_vec); } static inline char *alloc_one_pg_vec_page(unsigned long order) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; + char *buffer = NULL; + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | + __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; - return (char *) __get_free_pages(gfp_flags, order); + buffer = (char *) __get_free_pages(gfp_flags, order); + + if (buffer) + return buffer; + + /* + * __get_free_pages failed, fall back to vmalloc + */ + buffer = vzalloc((1 << order) * PAGE_SIZE); + + if (buffer) + return buffer; + + /* + * vmalloc failed, lets dig into swap here + */ + gfp_flags &= ~__GFP_NORETRY; + buffer = (char *)__get_free_pages(gfp_flags, order); + if (buffer) + return buffer; + + /* + * complete and utter failure + */ + return NULL; } -static char **alloc_pg_vec(struct tpacket_req *req, int order) +static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) { unsigned int block_nr = req->tp_block_nr; - char **pg_vec; + struct pgv *pg_vec; int i; - pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); if (unlikely(!pg_vec)) goto out; for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (unlikely(!pg_vec[i])) + pg_vec[i].buffer = alloc_one_pg_vec_page(order); + if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } @@ -2371,7 +2416,7 @@ out_free_pgvec: static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing, int tx_ring) { - char **pg_vec = NULL; + struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); int was_running, order = 0; struct packet_ring_buffer *rb; @@ -2533,15 +2578,17 @@ static int packet_mmap(struct file *file, struct socket *sock, continue; for (i = 0; i < rb->pg_vec_len; i++) { - struct page *page = virt_to_page(rb->pg_vec[i]); + struct page *page; + void *kaddr = rb->pg_vec[i].buffer; int pg_num; - for (pg_num = 0; pg_num < rb->pg_vec_pages; - pg_num++, page++) { + for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { + page = pgv_to_page(kaddr); err = vm_insert_page(vma, start, page); if (unlikely(err)) goto out; start += PAGE_SIZE; + kaddr += PAGE_SIZE; } } } diff --git a/net/phonet/Makefile b/net/phonet/Makefile index d62bbba649b3..e10b1b182ce3 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_PHONET) += phonet.o pn_pep.o -phonet-objs := \ +phonet-y := \ pn_dev.o \ pn_netlink.o \ socket.o \ @@ -8,4 +8,4 @@ phonet-objs := \ sysctl.o \ af_phonet.o -pn_pep-objs := pep.o pep-gprs.o +pn_pep-y := pep.o pep-gprs.o diff --git a/net/rds/Makefile b/net/rds/Makefile index b46eca109688..56d3f6023ced 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -4,7 +4,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ loop.o page.o rdma.o obj-$(CONFIG_RDS_RDMA) += rds_rdma.o -rds_rdma-objs := rdma_transport.o \ +rds_rdma-y := rdma_transport.o \ ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ ib_sysctl.o ib_rdma.o \ iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \ @@ -12,10 +12,8 @@ rds_rdma-objs := rdma_transport.o \ obj-$(CONFIG_RDS_TCP) += rds_tcp.o -rds_tcp-objs := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ +rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ tcp_send.o tcp_stats.o -ifeq ($(CONFIG_RDS_DEBUG), y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 04f599089e6d..0198191b756d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) rfkill_led_trigger_event(rfkill); } -const char *rfkill_get_led_trigger_name(struct rfkill *rfkill) -{ - return rfkill->led_trigger.name; -} -EXPORT_SYMBOL(rfkill_get_led_trigger_name); - -void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name) -{ - BUG_ON(!rfkill); - - rfkill->ledtrigname = name; -} -EXPORT_SYMBOL(rfkill_set_led_trigger_name); - static int rfkill_led_trigger_register(struct rfkill *rfkill) { rfkill->led_trigger.name = rfkill->ledtrigname diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index c46867c61c98..d1c3429b69ed 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -2,7 +2,7 @@ # Makefile for Linux kernel RxRPC # -af-rxrpc-objs := \ +af-rxrpc-y := \ af_rxrpc.o \ ar-accept.o \ ar-ack.o \ @@ -21,7 +21,7 @@ af-rxrpc-objs := \ ar-transport.o ifeq ($(CONFIG_PROC_FS),y) -af-rxrpc-objs += ar-proc.o +af-rxrpc-y += ar-proc.o endif obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 9f1729bd60de..a53fb25a64ed 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -47,12 +47,12 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) case AF_INET: fl.oif = 0; fl.proto = IPPROTO_UDP, - fl.nl_u.ip4_u.saddr = 0; - fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr; - fl.nl_u.ip4_u.tos = 0; + fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr; + fl.fl4_src = 0; + fl.fl4_tos = 0; /* assume AFS.CM talking to AFS.FS */ - fl.uli_u.ports.sport = htons(7001); - fl.uli_u.ports.dport = htons(7000); + fl.fl_ip_sport = htons(7001); + fl.fl_ip_dport = htons(7000); break; default: BUG(); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5dbb3cd96e59..0918834ee4a1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) { + if (!netif_tx_queue_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else @@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_frozen_or_stopped(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, ret = dev_requeue_skb(skb, q); } - if (ret && (netif_tx_queue_stopped(txq) || - netif_tx_queue_frozen(txq))) + if (ret && netif_tx_queue_frozen_or_stopped(txq)) ret = 0; return ret; @@ -555,7 +553,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, size = QDISC_ALIGN(sizeof(*sch)); size += ops->priv_size + (QDISC_ALIGNTO - 1); - p = kzalloc(size, GFP_KERNEL); + p = kzalloc_node(size, GFP_KERNEL, + netdev_queue_numa_node_read(dev_queue)); + if (!p) goto errout; sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 401af9596709..106479a7c94a 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -309,8 +309,7 @@ restart: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); - if (!netif_tx_queue_stopped(slave_txq) && - !netif_tx_queue_frozen(slave_txq) && + if (!netif_tx_queue_frozen_or_stopped(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6bd554323a34..842c7f3650b9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6047,7 +6047,7 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { spin_lock_bh(&sk->sk_receive_queue.lock); diff --git a/net/socket.c b/net/socket.c index 3ca2fd9e3720..c898df76e924 100644 --- a/net/socket.c +++ b/net/socket.c @@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; +static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -1200,7 +1200,7 @@ int __sock_create(struct net *net, int family, int type, int protocol, * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) + if (rcu_access_pointer(net_families[family]) == NULL) request_module("net-pf-%d", family); #endif @@ -2332,10 +2332,11 @@ int sock_register(const struct net_proto_family *ops) } spin_lock(&net_family_lock); - if (net_families[ops->family]) + if (rcu_dereference_protected(net_families[ops->family], + lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - net_families[ops->family] = ops; + rcu_assign_pointer(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); @@ -2363,7 +2364,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - net_families[family] = NULL; + rcu_assign_pointer(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index 7350d86a32ee..9e4cb59ef9f0 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -4,10 +4,10 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o -auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ +auth_rpcgss-y := auth_gss.o gss_generic_token.o \ gss_mech_switch.o svcauth_gss.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o -rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ +rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 8a2e89bffde5..886715a75259 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -35,11 +35,9 @@ */ #include "core.h" -#include "dbg.h" #include "addr.h" #include "zone.h" #include "cluster.h" -#include "net.h" /** * tipc_addr_domain_valid - validates a network domain address diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 22a60fc98392..6d828d9eda42 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -36,17 +36,9 @@ */ #include "core.h" -#include "msg.h" -#include "dbg.h" #include "link.h" -#include "net.h" -#include "node.h" #include "port.h" -#include "addr.h" -#include "node_subscr.h" #include "name_distr.h" -#include "bearer.h" -#include "name_table.h" #include "bcast.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 9927d1d56c4f..885da94be4ac 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -36,12 +36,9 @@ #include "core.h" #include "config.h" -#include "dbg.h" #include "bearer.h" -#include "link.h" #include "port.h" #include "discover.h" -#include "bcast.h" #define MAX_ADDR_STR 32 @@ -625,7 +622,7 @@ int tipc_block_bearer(const char *name) * Note: This routine assumes caller holds tipc_net_lock. */ -static int bearer_disable(struct bearer *b_ptr) +static void bearer_disable(struct bearer *b_ptr) { struct link *l_ptr; struct link *temp_l_ptr; @@ -641,7 +638,6 @@ static int bearer_disable(struct bearer *b_ptr) } spin_unlock_bh(&b_ptr->publ.lock); memset(b_ptr, 0, sizeof(struct bearer)); - return 0; } int tipc_disable_bearer(const char *name) @@ -654,8 +650,10 @@ int tipc_disable_bearer(const char *name) if (b_ptr == NULL) { warn("Attempt to disable unknown bearer <%s>\n", name); res = -EINVAL; - } else - res = bearer_disable(b_ptr); + } else { + bearer_disable(b_ptr); + res = 0; + } write_unlock_bh(&tipc_net_lock); return res; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index a850b389663e..85f451d5aacf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -37,12 +37,50 @@ #ifndef _TIPC_BEARER_H #define _TIPC_BEARER_H -#include "core.h" #include "bcast.h" #define MAX_BEARERS 8 #define MAX_MEDIA 4 +/* + * Identifiers of supported TIPC media types + */ +#define TIPC_MEDIA_TYPE_ETH 1 + +/* + * Destination address structure used by TIPC bearers when sending messages + * + * IMPORTANT: The fields of this structure MUST be stored using the specified + * byte order indicated below, as the structure is exchanged between nodes + * as part of a link setup process. + */ +struct tipc_media_addr { + __be32 type; /* bearer type (network byte order) */ + union { + __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */ + } dev_addr; +}; + +/** + * struct tipc_bearer - TIPC bearer info available to media code + * @usr_handle: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @blocked: non-zero if bearer is blocked + * @lock: spinlock for controlling access to bearer + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) + * + * Note: TIPC initializes "name" and "lock" fields; media code is responsible + * for initialization all other fields when a bearer is enabled. + */ +struct tipc_bearer { + void *usr_handle; + u32 mtu; + int blocked; + spinlock_t lock; + struct tipc_media_addr addr; + char name[TIPC_MAX_BEARER_NAME]; +}; /** * struct media - TIPC media information available to internal users @@ -55,7 +93,7 @@ * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion - * @type_id: TIPC media identifier [defined in tipc_bearer.h] + * @type_id: TIPC media identifier * @name: media name */ @@ -116,6 +154,34 @@ struct link; extern struct bearer tipc_bearers[]; +/* + * TIPC routines available to supported media types + */ +int tipc_register_media(u32 media_type, + char *media_name, int (*enable)(struct tipc_bearer *), + void (*disable)(struct tipc_bearer *), + int (*send_msg)(struct sk_buff *, + struct tipc_bearer *, struct tipc_media_addr *), + char *(*addr2str)(struct tipc_media_addr *a, + char *str_buf, int str_size), + struct tipc_media_addr *bcast_addr, const u32 bearer_priority, + const u32 link_tolerance, /* [ms] */ + const u32 send_window_limit); + +void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); + +int tipc_block_bearer(const char *name); +void tipc_continue(struct tipc_bearer *tb_ptr); + +int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); +int tipc_disable_bearer(const char *name); + +/* + * Routines made available to TIPC by supported media types + */ +int tipc_eth_media_start(void); +void tipc_eth_media_stop(void); + void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); @@ -126,7 +192,6 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); struct bearer *tipc_bearer_find_interface(const char *if_name); int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); -int tipc_bearer_init(void); void tipc_bearer_stop(void); void tipc_bearer_lock_push(struct bearer *b_ptr); diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 7fea14b98b97..405be87157ba 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -36,17 +36,10 @@ #include "core.h" #include "cluster.h" -#include "addr.h" -#include "node_subscr.h" #include "link.h" -#include "node.h" -#include "net.h" -#include "msg.h" -#include "bearer.h" static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, u32 lower, u32 upper); -static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest); struct tipc_node **tipc_local_nodes = NULL; struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}}; diff --git a/net/tipc/config.c b/net/tipc/config.c index 50a6133a3668..bdde39f0436b 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -35,23 +35,11 @@ */ #include "core.h" -#include "dbg.h" -#include "bearer.h" #include "port.h" #include "link.h" -#include "zone.h" -#include "addr.h" #include "name_table.h" -#include "node.h" +#include "user_reg.h" #include "config.h" -#include "discover.h" - -struct subscr_data { - char usr_handle[8]; - u32 domain; - u32 port_ref; - struct list_head subd_list; -}; struct manager { u32 user_ref; @@ -572,7 +560,7 @@ int tipc_cfg_init(void) struct tipc_name_seq seq; int res; - res = tipc_attach(&mng.user_ref, NULL, NULL); + res = tipc_attach(&mng.user_ref); if (res) goto failed; diff --git a/net/tipc/config.h b/net/tipc/config.h index 481e12ece715..443159a166fd 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -39,7 +39,6 @@ /* ---------------------------------------------------------------------- */ -#include "core.h" #include "link.h" struct sk_buff *tipc_cfg_reply_alloc(int payload_size); diff --git a/net/tipc/core.c b/net/tipc/core.c index e2a09eb8efd4..f5d62c174de2 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -40,7 +40,6 @@ #include <linux/random.h> #include "core.h" -#include "dbg.h" #include "ref.h" #include "net.h" #include "user_reg.h" @@ -236,43 +235,3 @@ module_exit(tipc_exit); MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(TIPC_MOD_VER); - -/* Native TIPC API for kernel-space applications (see tipc.h) */ - -EXPORT_SYMBOL(tipc_attach); -EXPORT_SYMBOL(tipc_detach); -EXPORT_SYMBOL(tipc_createport); -EXPORT_SYMBOL(tipc_deleteport); -EXPORT_SYMBOL(tipc_ownidentity); -EXPORT_SYMBOL(tipc_portimportance); -EXPORT_SYMBOL(tipc_set_portimportance); -EXPORT_SYMBOL(tipc_portunreliable); -EXPORT_SYMBOL(tipc_set_portunreliable); -EXPORT_SYMBOL(tipc_portunreturnable); -EXPORT_SYMBOL(tipc_set_portunreturnable); -EXPORT_SYMBOL(tipc_publish); -EXPORT_SYMBOL(tipc_withdraw); -EXPORT_SYMBOL(tipc_connect2port); -EXPORT_SYMBOL(tipc_disconnect); -EXPORT_SYMBOL(tipc_shutdown); -EXPORT_SYMBOL(tipc_send); -EXPORT_SYMBOL(tipc_send2name); -EXPORT_SYMBOL(tipc_send2port); -EXPORT_SYMBOL(tipc_multicast); - -/* TIPC API for external bearers (see tipc_bearer.h) */ - -EXPORT_SYMBOL(tipc_block_bearer); -EXPORT_SYMBOL(tipc_continue); -EXPORT_SYMBOL(tipc_disable_bearer); -EXPORT_SYMBOL(tipc_enable_bearer); -EXPORT_SYMBOL(tipc_recv_msg); -EXPORT_SYMBOL(tipc_register_media); - -/* TIPC API for external APIs (see tipc_port.h) */ - -EXPORT_SYMBOL(tipc_createport_raw); -EXPORT_SYMBOL(tipc_reject_msg); -EXPORT_SYMBOL(tipc_send_buf_fast); -EXPORT_SYMBOL(tipc_acknowledge); - diff --git a/net/tipc/core.h b/net/tipc/core.h index e19389e57227..ca7e171c1043 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -39,10 +39,6 @@ #include <linux/tipc.h> #include <linux/tipc_config.h> -#include <net/tipc/tipc_msg.h> -#include <net/tipc/tipc_port.h> -#include <net/tipc/tipc_bearer.h> -#include <net/tipc/tipc.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -62,6 +58,9 @@ #define TIPC_MOD_VER "2.0.0" +struct tipc_msg; /* msg.h */ +struct print_buf; /* dbg.h */ + /* * TIPC sanity test macros */ @@ -174,6 +173,13 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...); #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ /* + * TIPC operating mode routines + */ +#define TIPC_NOT_RUNNING 0 +#define TIPC_NODE_MODE 1 +#define TIPC_NET_MODE 2 + +/* * Global configuration variables */ diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 4a7cd3719b78..f2ce36baf42e 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -35,9 +35,7 @@ */ #include "core.h" -#include "dbg.h" #include "link.h" -#include "zone.h" #include "discover.h" #include "port.h" #include "name_table.h" diff --git a/net/tipc/discover.h b/net/tipc/discover.h index f8e750636123..d2c3cffb79fc 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -37,8 +37,6 @@ #ifndef _TIPC_DISCOVER_H #define _TIPC_DISCOVER_H -#include "core.h" - struct link_req; struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 6e988ba485fd..ee683cc8f4b1 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -34,13 +34,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <net/tipc/tipc.h> -#include <net/tipc/tipc_bearer.h> -#include <net/tipc/tipc_msg.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/net_namespace.h> +#include "core.h" +#include "bearer.h" + #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI #define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL diff --git a/net/tipc/link.c b/net/tipc/link.c index b31992ccd5d3..cf414cf05e72 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -35,19 +35,11 @@ */ #include "core.h" -#include "dbg.h" #include "link.h" -#include "net.h" -#include "node.h" #include "port.h" -#include "addr.h" -#include "node_subscr.h" #include "name_distr.h" -#include "bearer.h" -#include "name_table.h" #include "discover.h" #include "config.h" -#include "bcast.h" /* @@ -57,12 +49,6 @@ #define INVALID_SESSION 0x10000 /* - * Limit for deferred reception queue: - */ - -#define DEF_QUEUE_LIMIT 256u - -/* * Link state events: */ diff --git a/net/tipc/link.h b/net/tipc/link.h index f98bc613de67..c562888d25da 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -39,7 +39,6 @@ #include "dbg.h" #include "msg.h" -#include "bearer.h" #include "node.h" #define PUSH_FAILED 1 diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ecb532fb0351..ee6b4c68d4a4 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -36,9 +36,7 @@ #include "core.h" #include "addr.h" -#include "dbg.h" #include "msg.h" -#include "bearer.h" u32 tipc_msg_tot_importance(struct tipc_msg *m) { diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 031aad18efce..aee53864d7a0 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,10 +37,51 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "core.h" +#include "bearer.h" #define TIPC_VERSION 2 +/* + * TIPC user data message header format, version 2: + * + * + * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w0:|vers | user |hdr sz |n|d|s|-| message size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w1:|mstyp| error |rer cnt|lsc|opt p| broadcast ack no | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w2:| link level ack no | broadcast/link level seq no | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w3:| previous node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w4:| originating port | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w5:| destination port | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w6:| originating node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w7:| destination node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w8:| name type / transport sequence number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w9:| name instance/multicast lower bound | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * wA:| multicast upper bound | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ options \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 + + #define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ #define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ #define LONG_H_SIZE 40 /* Named messages */ @@ -52,20 +93,26 @@ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -/* - TIPC user data message header format, version 2 +struct tipc_msg { + __be32 hdr[15]; +}; - - Fundamental definitions available to privileged TIPC users - are located in tipc_msg.h. - - Remaining definitions available to TIPC internal users appear below. -*/ +static inline u32 msg_word(struct tipc_msg *m, u32 pos) +{ + return ntohl(m->hdr[pos]); +} static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) { m->hdr[w] = htonl(val); } +static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) +{ + return (msg_word(m, w) >> pos) & mask; +} + static inline void msg_set_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask, u32 val) { @@ -112,16 +159,36 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } +static inline u32 msg_importance(struct tipc_msg *m) +{ + return msg_bits(m, 0, 25, 0xf); +} + static inline void msg_set_importance(struct tipc_msg *m, u32 i) { msg_set_user(m, i); } +static inline u32 msg_hdr_sz(struct tipc_msg *m) +{ + return msg_bits(m, 0, 21, 0xf) << 2; +} + static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n) { msg_set_bits(m, 0, 21, 0xf, n>>2); } +static inline u32 msg_size(struct tipc_msg *m) +{ + return msg_bits(m, 0, 0, 0x1ffff); +} + +static inline u32 msg_data_sz(struct tipc_msg *m) +{ + return msg_size(m) - msg_hdr_sz(m); +} + static inline int msg_non_seq(struct tipc_msg *m) { return msg_bits(m, 0, 20, 1); @@ -162,11 +229,36 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) * Word 1 */ +static inline u32 msg_type(struct tipc_msg *m) +{ + return msg_bits(m, 1, 29, 0x7); +} + static inline void msg_set_type(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 29, 0x7, n); } +static inline u32 msg_named(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_NAMED_MSG; +} + +static inline u32 msg_mcast(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_MCAST_MSG; +} + +static inline u32 msg_connected(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_CONN_MSG; +} + +static inline u32 msg_errcode(struct tipc_msg *m) +{ + return msg_bits(m, 1, 25, 0xf); +} + static inline void msg_set_errcode(struct tipc_msg *m, u32 err) { msg_set_bits(m, 1, 25, 0xf, err); @@ -257,31 +349,68 @@ static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) */ +static inline u32 msg_prevnode(struct tipc_msg *m) +{ + return msg_word(m, 3); +} + static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 3, a); } +static inline u32 msg_origport(struct tipc_msg *m) +{ + return msg_word(m, 4); +} + static inline void msg_set_origport(struct tipc_msg *m, u32 p) { msg_set_word(m, 4, p); } +static inline u32 msg_destport(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + static inline void msg_set_destport(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } +static inline u32 msg_mc_netid(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } +static inline int msg_short(struct tipc_msg *m) +{ + return msg_hdr_sz(m) == 24; +} + +static inline u32 msg_orignode(struct tipc_msg *m) +{ + if (likely(msg_short(m))) + return msg_prevnode(m); + return msg_word(m, 6); +} + static inline void msg_set_orignode(struct tipc_msg *m, u32 a) { msg_set_word(m, 6, a); } +static inline u32 msg_destnode(struct tipc_msg *m) +{ + return msg_word(m, 7); +} + static inline void msg_set_destnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 7, a); @@ -299,6 +428,11 @@ static inline u32 msg_routed(struct tipc_msg *m) return(msg_destnode(m) ^ msg_orignode(m)) >> 11; } +static inline u32 msg_nametype(struct tipc_msg *m) +{ + return msg_word(m, 8); +} + static inline void msg_set_nametype(struct tipc_msg *m, u32 n) { msg_set_word(m, 8, n); @@ -324,6 +458,16 @@ static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } +static inline u32 msg_nameinst(struct tipc_msg *m) +{ + return msg_word(m, 9); +} + +static inline u32 msg_namelower(struct tipc_msg *m) +{ + return msg_nameinst(m); +} + static inline void msg_set_namelower(struct tipc_msg *m, u32 n) { msg_set_word(m, 9, n); @@ -334,11 +478,21 @@ static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) msg_set_namelower(m, n); } +static inline u32 msg_nameupper(struct tipc_msg *m) +{ + return msg_word(m, 10); +} + static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) { msg_set_word(m, 10, n); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) { return (struct tipc_msg *)msg_data(m); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 7b907171f879..10ff48be3c01 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -36,9 +36,7 @@ #include "core.h" #include "cluster.h" -#include "dbg.h" #include "link.h" -#include "msg.h" #include "name_distr.h" #define ITEM_SIZE sizeof(struct distr_item) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 3a8de4334da1..d5adb0456746 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -36,15 +36,10 @@ #include "core.h" #include "config.h" -#include "dbg.h" #include "name_table.h" #include "name_distr.h" -#include "addr.h" -#include "node_subscr.h" #include "subscr.h" #include "port.h" -#include "cluster.h" -#include "bcast.h" static int tipc_nametbl_size = 1024; /* must be a power of 2 */ diff --git a/net/tipc/net.c b/net/tipc/net.c index 1a621cfd6604..c2b4b86c2e6a 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -35,18 +35,13 @@ */ #include "core.h" -#include "bearer.h" #include "net.h" #include "zone.h" -#include "addr.h" #include "name_table.h" #include "name_distr.h" #include "subscr.h" #include "link.h" -#include "msg.h" #include "port.h" -#include "bcast.h" -#include "discover.h" #include "config.h" /* diff --git a/net/tipc/node.c b/net/tipc/node.c index b4d87eb2dc5d..df71dfc3a9ae 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -37,16 +37,9 @@ #include "core.h" #include "config.h" #include "node.h" -#include "cluster.h" -#include "net.h" -#include "addr.h" -#include "node_subscr.h" -#include "link.h" #include "port.h" -#include "bearer.h" #include "name_distr.h" -void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str); static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 19194d476a9e..018a55332d91 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -35,10 +35,8 @@ */ #include "core.h" -#include "dbg.h" #include "node_subscr.h" #include "node.h" -#include "addr.h" /** * tipc_nodesub_subscribe - create "node down" subscription for specified node diff --git a/net/tipc/port.c b/net/tipc/port.c index 82092eaa1536..7873283f4965 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -36,15 +36,9 @@ #include "core.h" #include "config.h" -#include "dbg.h" #include "port.h" -#include "addr.h" -#include "link.h" -#include "node.h" #include "name_table.h" #include "user_reg.h" -#include "msg.h" -#include "bcast.h" /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ @@ -94,7 +88,7 @@ static void port_incr_out_seqno(struct port *p_ptr) * tipc_multicast - send a multicast message to local and remote destinations */ -int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain, +int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 num_sect, struct iovec const *msg_sect) { struct tipc_msg *hdr; @@ -989,13 +983,6 @@ int tipc_createport(u32 user_ref, return 0; } -int tipc_ownidentity(u32 ref, struct tipc_portid *id) -{ - id->ref = ref; - id->node = tipc_own_addr; - return 0; -} - int tipc_portimportance(u32 ref, unsigned int *importance) { struct port *p_ptr; @@ -1271,16 +1258,11 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) } /** - * tipc_forward2name - forward message sections to port name + * tipc_send2name - send message sections to port name */ -static int tipc_forward2name(u32 ref, - struct tipc_name const *name, - u32 domain, - u32 num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *orig, - unsigned int importance) +int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, + unsigned int num_sect, struct iovec const *msg_sect) { struct port *p_ptr; struct tipc_msg *msg; @@ -1294,14 +1276,12 @@ static int tipc_forward2name(u32 ref, msg = &p_ptr->publ.phdr; msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); + msg_set_orignode(msg, tipc_own_addr); + msg_set_origport(msg, ref); msg_set_hdr_sz(msg, LONG_H_SIZE); msg_set_nametype(msg, name->type); msg_set_nameinst(msg, name->instance); msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg,importance); destport = tipc_nametbl_translate(name->type, name->instance, &destnode); msg_set_destnode(msg, destnode); msg_set_destport(msg, destport); @@ -1325,33 +1305,11 @@ static int tipc_forward2name(u32 ref, } /** - * tipc_send2name - send message sections to port name - */ - -int tipc_send2name(u32 ref, - struct tipc_name const *name, - unsigned int domain, - unsigned int num_sect, - struct iovec const *msg_sect) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward2name(ref, name, domain, num_sect, msg_sect, &orig, - TIPC_PORT_IMPORTANCE); -} - -/** - * tipc_forward2port - forward message sections to port identity + * tipc_send2port - send message sections to port identity */ -static int tipc_forward2port(u32 ref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *orig, - unsigned int importance) +int tipc_send2port(u32 ref, struct tipc_portid const *dest, + unsigned int num_sect, struct iovec const *msg_sect) { struct port *p_ptr; struct tipc_msg *msg; @@ -1363,13 +1321,11 @@ static int tipc_forward2port(u32 ref, msg = &p_ptr->publ.phdr; msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); + msg_set_orignode(msg, tipc_own_addr); + msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, importance); p_ptr->sent++; if (dest->node == tipc_own_addr) return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); @@ -1384,31 +1340,11 @@ static int tipc_forward2port(u32 ref, } /** - * tipc_send2port - send message sections to port identity + * tipc_send_buf2port - send message buffer to port identity */ -int tipc_send2port(u32 ref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward2port(ref, dest, num_sect, msg_sect, &orig, - TIPC_PORT_IMPORTANCE); -} - -/** - * tipc_forward_buf2port - forward message buffer to port identity - */ -static int tipc_forward_buf2port(u32 ref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance) +int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, + struct sk_buff *buf, unsigned int dsz) { struct port *p_ptr; struct tipc_msg *msg; @@ -1420,13 +1356,11 @@ static int tipc_forward_buf2port(u32 ref, msg = &p_ptr->publ.phdr; msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); + msg_set_orignode(msg, tipc_own_addr); + msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, importance); msg_set_size(msg, DIR_MSG_H_SIZE + dsz); if (skb_cow(buf, DIR_MSG_H_SIZE)) return -ENOMEM; @@ -1445,20 +1379,3 @@ static int tipc_forward_buf2port(u32 ref, return -ELINKCONG; } -/** - * tipc_send_buf2port - send message buffer to port identity - */ - -int tipc_send_buf2port(u32 ref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward_buf2port(ref, dest, buf, dsz, &orig, - TIPC_PORT_IMPORTANCE); -} - diff --git a/net/tipc/port.h b/net/tipc/port.h index 73bbf442b346..3a807fcec2be 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -37,13 +37,44 @@ #ifndef _TIPC_PORT_H #define _TIPC_PORT_H -#include "core.h" #include "ref.h" #include "net.h" #include "msg.h" -#include "dbg.h" #include "node_subscr.h" +#define TIPC_FLOW_CONTROL_WIN 512 + +typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason, + struct tipc_portid const *attmpt_destid); + +typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason, + struct tipc_name_seq const *attmpt_dest); + +typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason); + +typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, unsigned int importance, + struct tipc_portid const *origin); + +typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, unsigned int importance, + struct tipc_portid const *orig, + struct tipc_name_seq const *dest); + +typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size); + +typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); + /** * struct user_port - TIPC user port (used with native API) * @user_ref: id of user who created user port @@ -68,6 +99,34 @@ struct user_port { }; /** + * struct tipc_port - TIPC port info available to socket API + * @usr_handle: pointer to additional user-defined information about port + * @lock: pointer to spinlock for controlling access to port + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @conn_unacked: number of unacknowledged messages received from peer port + * @published: non-zero if port has one or more associated names + * @congested: non-zero if cannot send because of link or port congestion + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + */ +struct tipc_port { + void *usr_handle; + spinlock_t *lock; + int connected; + u32 conn_type; + u32 conn_instance; + u32 conn_unacked; + int published; + u32 congested; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; +}; + +/** * struct port - TIPC port structure * @publ: TIPC port info available to privileged users * @port_list: adjacent ports in TIPC's global list of ports @@ -109,11 +168,76 @@ struct port { extern spinlock_t tipc_port_list_lock; struct port_list; +/* + * TIPC port manipulation routines + */ +struct tipc_port *tipc_createport_raw(void *usr_handle, + u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), + void (*wakeup)(struct tipc_port *), const u32 importance); + +int tipc_reject_msg(struct sk_buff *buf, u32 err); + +int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); + +void tipc_acknowledge(u32 port_ref, u32 ack); + +int tipc_createport(unsigned int tipc_user, void *usr_handle, + unsigned int importance, tipc_msg_err_event error_cb, + tipc_named_msg_err_event named_error_cb, + tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, + tipc_named_msg_event named_msg_cb, + tipc_conn_msg_event conn_msg_cb, + tipc_continue_event continue_event_cb, u32 *portref); + +int tipc_deleteport(u32 portref); + +int tipc_portimportance(u32 portref, unsigned int *importance); +int tipc_set_portimportance(u32 portref, unsigned int importance); + +int tipc_portunreliable(u32 portref, unsigned int *isunreliable); +int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); + +int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); +int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); + +int tipc_publish(u32 portref, unsigned int scope, + struct tipc_name_seq const *name_seq); +int tipc_withdraw(u32 portref, unsigned int scope, + struct tipc_name_seq const *name_seq); + +int tipc_connect2port(u32 portref, struct tipc_portid const *port); + +int tipc_disconnect(u32 portref); + +int tipc_shutdown(u32 ref); + + +/* + * The following routines require that the port be locked on entry + */ +int tipc_disconnect_port(struct tipc_port *tp_ptr); + +/* + * TIPC messaging routines + */ +int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, + unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send2port(u32 portref, struct tipc_portid const *dest, + unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, + struct sk_buff *buf, unsigned int dsz); + +int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, + unsigned int section_count, struct iovec const *msg); + int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err); struct sk_buff *tipc_port_get_ports(void); -struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space); void tipc_port_recv_proto_msg(struct sk_buff *buf); void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp); void tipc_port_reinit(void); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e9f0d5004483..cd0bb77f2673 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -49,10 +49,9 @@ #include <linux/tipc.h> #include <linux/tipc_config.h> -#include <net/tipc/tipc_msg.h> -#include <net/tipc/tipc_port.h> #include "core.h" +#include "port.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ @@ -404,7 +403,8 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.ref = tsock->peer_name.ref; addr->addr.id.node = tsock->peer_name.node; } else { - tipc_ownidentity(tsock->p->ref, &addr->addr.id); + addr->addr.id.ref = tsock->p->ref; + addr->addr.id.node = tipc_own_addr; } *uaddr_len = sizeof(*addr); @@ -597,7 +597,6 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, break; res = tipc_multicast(tport->ref, &dest->addr.nameseq, - 0, m->msg_iovlen, m->msg_iov); } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 33313961d010..23f43d03980c 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -35,10 +35,8 @@ */ #include "core.h" -#include "dbg.h" #include "name_table.h" -#include "port.h" -#include "ref.h" +#include "user_reg.h" #include "subscr.h" /** @@ -544,14 +542,14 @@ static void subscr_named_msg_event(void *usr_handle, int tipc_subscr_start(void) { struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; - int res = -1; + int res; memset(&topsrv, 0, sizeof (topsrv)); spin_lock_init(&topsrv.lock); INIT_LIST_HEAD(&topsrv.subscriber_list); spin_lock_bh(&topsrv.lock); - res = tipc_attach(&topsrv.user_ref, NULL, NULL); + res = tipc_attach(&topsrv.user_ref); if (res) { spin_unlock_bh(&topsrv.lock); return res; diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c index 506928803162..2e2702e2049c 100644 --- a/net/tipc/user_reg.c +++ b/net/tipc/user_reg.c @@ -50,15 +50,11 @@ /** * struct tipc_user - registered TIPC user info * @next: index of next free registry entry (or -1 for an allocated entry) - * @callback: ptr to routine to call when TIPC mode changes (NULL if none) - * @usr_handle: user-defined value passed to callback routine * @ports: list of user ports owned by the user */ struct tipc_user { int next; - tipc_mode_event callback; - void *usr_handle; struct list_head ports; }; @@ -95,41 +91,12 @@ static int reg_init(void) } /** - * reg_callback - inform TIPC user about current operating mode - */ - -static void reg_callback(struct tipc_user *user_ptr) -{ - tipc_mode_event cb; - void *arg; - - spin_lock_bh(®_lock); - cb = user_ptr->callback; - arg = user_ptr->usr_handle; - spin_unlock_bh(®_lock); - - if (cb) - cb(arg, tipc_mode, tipc_own_addr); -} - -/** * tipc_reg_start - activate TIPC user registry */ int tipc_reg_start(void) { - u32 u; - int res; - - if ((res = reg_init())) - return res; - - for (u = 1; u <= MAX_USERID; u++) { - if (users[u].callback) - tipc_k_signal((Handler)reg_callback, - (unsigned long)&users[u]); - } - return 0; + return reg_init(); } /** @@ -138,15 +105,9 @@ int tipc_reg_start(void) void tipc_reg_stop(void) { - int id; - if (!users) return; - for (id = 1; id <= MAX_USERID; id++) { - if (users[id].callback) - reg_callback(&users[id]); - } kfree(users); users = NULL; } @@ -157,12 +118,10 @@ void tipc_reg_stop(void) * NOTE: This routine may be called when TIPC is inactive. */ -int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle) +int tipc_attach(u32 *userid) { struct tipc_user *user_ptr; - if ((tipc_mode == TIPC_NOT_RUNNING) && !cb) - return -ENOPROTOOPT; if (!users) reg_init(); @@ -177,13 +136,9 @@ int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle) user_ptr->next = -1; spin_unlock_bh(®_lock); - user_ptr->callback = cb; - user_ptr->usr_handle = usr_handle; INIT_LIST_HEAD(&user_ptr->ports); atomic_inc(&tipc_user_count); - if (cb && (tipc_mode != TIPC_NOT_RUNNING)) - tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr); return 0; } @@ -207,7 +162,6 @@ void tipc_detach(u32 userid) } user_ptr = &users[userid]; - user_ptr->callback = NULL; INIT_LIST_HEAD(&ports_temp); list_splice(&user_ptr->ports, &ports_temp); user_ptr->next = next_free_user; diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h index 81dc12e2882f..109eed0d6de3 100644 --- a/net/tipc/user_reg.h +++ b/net/tipc/user_reg.h @@ -42,6 +42,9 @@ int tipc_reg_start(void); void tipc_reg_stop(void); +int tipc_attach(unsigned int *userref); +void tipc_detach(unsigned int userref); + int tipc_reg_add_port(struct user_port *up_ptr); int tipc_reg_remove_port(struct user_port *up_ptr); diff --git a/net/tipc/zone.c b/net/tipc/zone.c index 83f8b5e91fc8..1b61ca8c48ef 100644 --- a/net/tipc/zone.c +++ b/net/tipc/zone.c @@ -36,9 +36,6 @@ #include "core.h" #include "zone.h" -#include "net.h" -#include "addr.h" -#include "node_subscr.h" #include "cluster.h" #include "node.h" diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2268e6798124..417d7a6c36cf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -316,7 +316,8 @@ static void unix_write_space(struct sock *sk) if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); if (wq_has_sleeper(wq)) - wake_up_interruptible_sync(&wq->wait); + wake_up_interruptible_sync_poll(&wq->wait, + POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); @@ -1736,7 +1737,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_unlock; } - wake_up_interruptible_sync(&u->peer_wait); + wake_up_interruptible_sync_poll(&u->peer_wait, + POLLOUT | POLLWRNORM | POLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); @@ -2099,13 +2101,12 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP; + mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || - (sk->sk_shutdown & RCV_SHUTDOWN)) + if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2117,20 +2118,19 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; } - /* writable? */ - writable = unix_writable(sk); - if (writable) { - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, - wait); - if (unix_recvq_full(other)) - writable = 0; - } + /* No write status requested, avoid expensive OUT tests. */ + if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT))) + return mask; - sock_put(other); + writable = unix_writable(sk); + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); + if (unix_recvq_full(other)) + writable = 0; } + sock_put(other); } if (writable) diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile index 9f188ab3dcd0..4da14bc48078 100644 --- a/net/wanrouter/Makefile +++ b/net/wanrouter/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_WAN_ROUTER) += wanrouter.o -wanrouter-objs := wanproc.o wanmain.o +wanrouter-y := wanproc.o wanmain.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 9c21ebf9780e..630bcf0a2f04 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -4,6 +4,8 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/if.h> #include <linux/module.h> #include <linux/err.h> @@ -216,8 +218,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, rdev->wiphy.debugfsdir, rdev->wiphy.debugfsdir->d_parent, newname)) - printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", - newname); + pr_err("failed to rename debugfs dir to %s!\n", newname); nl80211_notify_dev_rename(rdev); @@ -699,8 +700,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, "phy80211")) { - printk(KERN_ERR "wireless: failed to add phy80211 " - "symlink to netdev!\n"); + pr_err("failed to add phy80211 symlink to netdev!\n"); } wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index 97d411f74507..3268fac5ab22 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -13,6 +13,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/ctype.h> #include <linux/ieee80211.h> @@ -224,8 +226,8 @@ int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops) return -EINVAL; found: - printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm " - "'%s'\n", ops->name); + printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n", + ops->name); list_del(&alg->list); spin_unlock_irqrestore(&lib80211_crypto_lock, flags); kfree(alg); @@ -270,7 +272,7 @@ static struct lib80211_crypto_ops lib80211_crypt_null = { static int __init lib80211_init(void) { - printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + pr_info(DRV_DESCRIPTION "\n"); return lib80211_register_crypto_ops(&lib80211_crypt_null); } diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 0fe40510e2cb..7ea4f2b0770e 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -10,6 +10,8 @@ * more details. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> @@ -99,8 +101,7 @@ static void *lib80211_tkip_init(int key_idx) priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_arc4)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API arc4\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n")); priv->tx_tfm_arc4 = NULL; goto fail; } @@ -108,8 +109,7 @@ static void *lib80211_tkip_init(int key_idx) priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_michael)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API michael_mic\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n")); priv->tx_tfm_michael = NULL; goto fail; } @@ -117,8 +117,7 @@ static void *lib80211_tkip_init(int key_idx) priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_arc4)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API arc4\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n")); priv->rx_tfm_arc4 = NULL; goto fail; } @@ -126,8 +125,7 @@ static void *lib80211_tkip_init(int key_idx) priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_michael)) { - printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " - "crypto API michael_mic\n"); + printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n")); priv->rx_tfm_michael = NULL; goto fail; } @@ -536,7 +534,7 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, struct scatterlist sg[2]; if (tfm_michael == NULL) { - printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); + pr_warn("%s(): tfm_michael == NULL\n", __func__); return -1; } sg_init_table(sg, 2); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 26838d903b9a..6980a0c315b2 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + /* Indicate roaming trigger event to user space */ + nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4e78e3f26798..67ff7e92cb99 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -166,7 +166,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, + + [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, + + [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -526,7 +532,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.rts_threshold); NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, dev->wiphy.coverage_class); - NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, @@ -545,6 +550,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + if (dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); + if (!res) { + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant); + } + } + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -1024,6 +1039,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && + info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { + u32 tx_ant, rx_ant; + if (!rdev->ops->set_antenna) { + result = -EOPNOTSUPP; + goto bad_res; + } + + tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); + rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); + + result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { @@ -3569,6 +3600,34 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change); } +static bool +nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, + int mcast_rate[IEEE80211_NUM_BANDS], + int rateval) +{ + struct wiphy *wiphy = &rdev->wiphy; + bool found = false; + int band, i; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; + + sband = wiphy->bands[band]; + if (!sband) + continue; + + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == rateval) { + mcast_rate[band] = i + 1; + found = true; + break; + } + } + } + + return found; +} + static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3653,6 +3712,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_MCAST_RATE] && + !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) + return -EINVAL; + if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, info->attrs[NL80211_ATTR_KEYS]); @@ -5651,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer); + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets); + + nla_nest_end(msg, pinfoattr); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 30d2f939150d..16c2f7190768 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp); #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4b9f8912526c..5ed615f94e0c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -32,6 +32,9 @@ * rely on some SHA1 checksum of the regdomain for example. * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> @@ -48,7 +51,7 @@ #ifdef CONFIG_CFG80211_REG_DEBUG #define REG_DBG_PRINT(format, args...) \ do { \ - printk(KERN_DEBUG format , ## args); \ + printk(KERN_DEBUG pr_fmt(format), ##args); \ } while (0) #else #define REG_DBG_PRINT(args...) @@ -96,6 +99,9 @@ struct reg_beacon { struct ieee80211_channel chan; }; +static void reg_todo(struct work_struct *work); +static DECLARE_WORK(reg_work, reg_todo); + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 5, @@ -367,11 +373,10 @@ static int call_crda(const char *alpha2) }; if (!is_world_regdom((char *) alpha2)) - printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", + pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else - printk(KERN_INFO "cfg80211: Calling CRDA to update world " - "regulatory domain\n"); + pr_info("Calling CRDA to update world regulatory domain\n"); /* query internal regulatory database (if it exists) */ reg_regdb_query(alpha2); @@ -711,6 +716,60 @@ int freq_reg_info(struct wiphy *wiphy, } EXPORT_SYMBOL(freq_reg_info); +#ifdef CONFIG_CFG80211_REG_DEBUG +static const char *reg_initiator_name(enum nl80211_reg_initiator initiator) +{ + switch (initiator) { + case NL80211_REGDOM_SET_BY_CORE: + return "Set by core"; + case NL80211_REGDOM_SET_BY_USER: + return "Set by user"; + case NL80211_REGDOM_SET_BY_DRIVER: + return "Set by driver"; + case NL80211_REGDOM_SET_BY_COUNTRY_IE: + return "Set by country IE"; + default: + WARN_ON(1); + return "Set by bug"; + } +} + +static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, + u32 desired_bw_khz, + const struct ieee80211_reg_rule *reg_rule) +{ + const struct ieee80211_power_rule *power_rule; + const struct ieee80211_freq_range *freq_range; + char max_antenna_gain[32]; + + power_rule = ®_rule->power_rule; + freq_range = ®_rule->freq_range; + + if (!power_rule->max_antenna_gain) + snprintf(max_antenna_gain, 32, "N/A"); + else + snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain); + + REG_DBG_PRINT("Updating information on frequency %d MHz " + "for %d a MHz width channel with regulatory rule:\n", + chan->center_freq, + KHZ_TO_MHZ(desired_bw_khz)); + + REG_DBG_PRINT("%d KHz - %d KHz @ KHz), (%s mBi, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + max_antenna_gain, + power_rule->max_eirp); +} +#else +static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, + u32 desired_bw_khz, + const struct ieee80211_reg_rule *reg_rule) +{ + return; +} +#endif + /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz @@ -720,7 +779,9 @@ EXPORT_SYMBOL(freq_reg_info); * on the wiphy with the target_bw specified. Then we can simply use * that below for the desired_bw_khz below. */ -static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, +static void handle_channel(struct wiphy *wiphy, + enum nl80211_reg_initiator initiator, + enum ieee80211_band band, unsigned int chan_idx) { int r; @@ -748,8 +809,27 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, desired_bw_khz, ®_rule); - if (r) + if (r) { + /* + * We will disable all channels that do not match our + * recieved regulatory rule unless the hint is coming + * from a Country IE and the Country IE had no information + * about a band. The IEEE 802.11 spec allows for an AP + * to send only a subset of the regulatory rules allowed, + * so an AP in the US that only supports 2.4 GHz may only send + * a country IE with information for the 2.4 GHz band + * while 5 GHz is still supported. + */ + if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + r == -ERANGE) + return; + + REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); + chan->flags = IEEE80211_CHAN_DISABLED; return; + } + + chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -784,7 +864,9 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) +static void handle_band(struct wiphy *wiphy, + enum ieee80211_band band, + enum nl80211_reg_initiator initiator) { unsigned int i; struct ieee80211_supported_band *sband; @@ -793,24 +875,42 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) sband = wiphy->bands[band]; for (i = 0; i < sband->n_channels; i++) - handle_channel(wiphy, band, i); + handle_channel(wiphy, initiator, band, i); } static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { - if (!last_request) + if (!last_request) { + REG_DBG_PRINT("Ignoring regulatory request %s since " + "last_request is not set\n", + reg_initiator_name(initiator)); return true; + } + if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { + REG_DBG_PRINT("Ignoring regulatory request %s " + "since the driver uses its own custom " + "regulatory domain ", + reg_initiator_name(initiator)); return true; + } + /* * wiphy->regd will be set once the device has its own * desired regulatory domain set */ if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && - !is_world_regdom(last_request->alpha2)) + initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + !is_world_regdom(last_request->alpha2)) { + REG_DBG_PRINT("Ignoring regulatory request %s " + "since the driver requires its own regulaotry " + "domain to be set first", + reg_initiator_name(initiator)); return true; + } + return false; } @@ -1030,7 +1130,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, goto out; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy, band); + handle_band(wiphy, band, initiator); } out: reg_process_beacons(wiphy); @@ -1066,10 +1166,17 @@ static void handle_channel_custom(struct wiphy *wiphy, regd); if (r) { + REG_DBG_PRINT("Disabling freq %d MHz as custom " + "regd has no rule that fits a %d MHz " + "wide channel\n", + chan->center_freq, + KHZ_TO_MHZ(desired_bw_khz)); chan->flags = IEEE80211_CHAN_DISABLED; return; } + chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule); + power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -1215,6 +1322,21 @@ static int ignore_request(struct wiphy *wiphy, return -EINVAL; } +static void reg_set_request_processed(void) +{ + bool need_more_processing = false; + + last_request->processed = true; + + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list)) + need_more_processing = true; + spin_unlock(®_requests_lock); + + if (need_more_processing) + schedule_work(®_work); +} + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this @@ -1290,8 +1412,10 @@ new_request: * have applied the requested regulatory domain before we just * inform userspace we have processed the request */ - if (r == -EALREADY) + if (r == -EALREADY) { nl80211_send_reg_change_event(last_request); + reg_set_request_processed(); + } return r; } @@ -1307,16 +1431,13 @@ static void reg_process_hint(struct regulatory_request *reg_request) BUG_ON(!reg_request->alpha2); - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - if (wiphy_idx_valid(reg_request->wiphy_idx)) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); - goto out; + return; } r = __regulatory_hint(wiphy, reg_request); @@ -1324,28 +1445,46 @@ static void reg_process_hint(struct regulatory_request *reg_request) if (r == -EALREADY && wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) wiphy_update_regulatory(wiphy, initiator); -out: - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } -/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */ +/* + * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* + * Regulatory hints come on a first come first serve basis and we + * must process each one atomically. + */ static void reg_process_pending_hints(void) - { +{ struct regulatory_request *reg_request; + mutex_lock(&cfg80211_mutex); + mutex_lock(®_mutex); + + /* When last_request->processed becomes true this will be rescheduled */ + if (last_request && !last_request->processed) { + REG_DBG_PRINT("Pending regulatory request, waiting " + "for it to be processed..."); + goto out; + } + spin_lock(®_requests_lock); - while (!list_empty(®_requests_list)) { - reg_request = list_first_entry(®_requests_list, - struct regulatory_request, - list); - list_del_init(®_request->list); + if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - reg_process_hint(reg_request); - spin_lock(®_requests_lock); + goto out; } + + reg_request = list_first_entry(®_requests_list, + struct regulatory_request, + list); + list_del_init(®_request->list); + spin_unlock(®_requests_lock); + + reg_process_hint(reg_request); + +out: + mutex_unlock(®_mutex); + mutex_unlock(&cfg80211_mutex); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1392,8 +1531,6 @@ static void reg_todo(struct work_struct *work) reg_process_pending_beacon_hints(); } -static DECLARE_WORK(reg_work, reg_todo); - static void queue_regulatory_request(struct regulatory_request *request) { if (isalpha(request->alpha2[0])) @@ -1428,12 +1565,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; - /* - * This ensures last_request is populated once modules - * come swinging in and calling regulatory hints and - * wiphy_apply_custom_regulatory(). - */ - reg_process_hint(request); + queue_regulatory_request(request); return 0; } @@ -1559,7 +1691,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) if (is_user_regdom_saved()) { /* Unless we're asked to ignore it and reset it */ if (reset_user) { - REG_DBG_PRINT("cfg80211: Restoring regulatory settings " + REG_DBG_PRINT("Restoring regulatory settings " "including user preference\n"); user_alpha2[0] = '9'; user_alpha2[1] = '7'; @@ -1570,7 +1702,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) * back as they were for a full restore. */ if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("cfg80211: Keeping preference on " + REG_DBG_PRINT("Keeping preference on " "module parameter ieee80211_regdom: %c%c\n", ieee80211_regdom[0], ieee80211_regdom[1]); @@ -1578,7 +1710,7 @@ static void restore_alpha2(char *alpha2, bool reset_user) alpha2[1] = ieee80211_regdom[1]; } } else { - REG_DBG_PRINT("cfg80211: Restoring regulatory settings " + REG_DBG_PRINT("Restoring regulatory settings " "while preserving user preference for: %c%c\n", user_alpha2[0], user_alpha2[1]); @@ -1586,14 +1718,14 @@ static void restore_alpha2(char *alpha2, bool reset_user) alpha2[1] = user_alpha2[1]; } } else if (!is_world_regdom(ieee80211_regdom)) { - REG_DBG_PRINT("cfg80211: Keeping preference on " + REG_DBG_PRINT("Keeping preference on " "module parameter ieee80211_regdom: %c%c\n", ieee80211_regdom[0], ieee80211_regdom[1]); alpha2[0] = ieee80211_regdom[0]; alpha2[1] = ieee80211_regdom[1]; } else - REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n"); + REG_DBG_PRINT("Restoring regulatory settings\n"); } /* @@ -1661,7 +1793,7 @@ static void restore_regulatory_settings(bool reset_user) void regulatory_hint_disconnect(void) { - REG_DBG_PRINT("cfg80211: All devices are disconnected, going to " + REG_DBG_PRINT("All devices are disconnected, going to " "restore regulatory settings\n"); restore_regulatory_settings(false); } @@ -1691,7 +1823,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, if (!reg_beacon) return -ENOMEM; - REG_DBG_PRINT("cfg80211: Found new beacon on " + REG_DBG_PRINT("Found new beacon on " "frequency: %d MHz (Ch %d) on %s\n", beacon_chan->center_freq, ieee80211_frequency_to_channel(beacon_chan->center_freq), @@ -1721,8 +1853,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) const struct ieee80211_freq_range *freq_range = NULL; const struct ieee80211_power_rule *power_rule = NULL; - printk(KERN_INFO " (start_freq - end_freq @ bandwidth), " - "(max_antenna_gain, max_eirp)\n"); + pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n"); for (i = 0; i < rd->n_reg_rules; i++) { reg_rule = &rd->reg_rules[i]; @@ -1734,16 +1865,14 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) * in certain regions */ if (power_rule->max_antenna_gain) - printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " - "(%d mBi, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, freq_range->max_bandwidth_khz, power_rule->max_antenna_gain, power_rule->max_eirp); else - printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " - "(N/A, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, freq_range->max_bandwidth_khz, @@ -1762,27 +1891,20 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) rdev = cfg80211_rdev_by_wiphy_idx( last_request->wiphy_idx); if (rdev) { - printk(KERN_INFO "cfg80211: Current regulatory " - "domain updated by AP to: %c%c\n", + pr_info("Current regulatory domain updated by AP to: %c%c\n", rdev->country_ie_alpha2[0], rdev->country_ie_alpha2[1]); } else - printk(KERN_INFO "cfg80211: Current regulatory " - "domain intersected:\n"); + pr_info("Current regulatory domain intersected:\n"); } else - printk(KERN_INFO "cfg80211: Current regulatory " - "domain intersected:\n"); + pr_info("Current regulatory domain intersected:\n"); } else if (is_world_regdom(rd->alpha2)) - printk(KERN_INFO "cfg80211: World regulatory " - "domain updated:\n"); + pr_info("World regulatory domain updated:\n"); else { if (is_unknown_alpha2(rd->alpha2)) - printk(KERN_INFO "cfg80211: Regulatory domain " - "changed to driver built-in settings " - "(unknown country)\n"); + pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); else - printk(KERN_INFO "cfg80211: Regulatory domain " - "changed to country: %c%c\n", + pr_info("Regulatory domain changed to country: %c%c\n", rd->alpha2[0], rd->alpha2[1]); } print_rd_rules(rd); @@ -1790,8 +1912,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) static void print_regdomain_info(const struct ieee80211_regdomain *rd) { - printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", - rd->alpha2[0], rd->alpha2[1]); + pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]); print_rd_rules(rd); } @@ -1842,8 +1963,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; if (!is_valid_rd(rd)) { - printk(KERN_ERR "cfg80211: Invalid " - "regulatory domain detected:\n"); + pr_err("Invalid regulatory domain detected:\n"); print_regdomain_info(rd); return -EINVAL; } @@ -1959,6 +2079,8 @@ int set_regdom(const struct ieee80211_regdomain *rd) nl80211_send_reg_change_event(last_request); + reg_set_request_processed(); + mutex_unlock(®_mutex); return r; @@ -2015,8 +2137,7 @@ int __init regulatory_init(void) * early boot for call_usermodehelper(). For now treat these * errors as non-fatal. */ - printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable " - "to call CRDA during init"); + pr_err("kobject_uevent_env() was unable to call CRDA during init\n"); #ifdef CONFIG_CFG80211_REG_DEBUG /* We want to find out exactly why when debugging */ WARN_ON(err); diff --git a/net/wireless/util.c b/net/wireless/util.c index 76120aeda57d..fee020b15a4e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -502,7 +502,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, skb_orphan(skb); if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) { - printk(KERN_ERR "failed to reallocate Tx buffer\n"); + pr_err("failed to reallocate Tx buffer\n"); return -ENOMEM; } skb->truesize += head_need; @@ -685,20 +685,17 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) continue; if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL, &wdev->connect_keys->params[i])) { - printk(KERN_ERR "%s: failed to set key %d\n", - dev->name, i); + netdev_err(dev, "failed to set key %d\n", i); continue; } if (wdev->connect_keys->def == i) if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) { - printk(KERN_ERR "%s: failed to set defkey %d\n", - dev->name, i); + netdev_err(dev, "failed to set defkey %d\n", i); continue; } if (wdev->connect_keys->defmgmt == i) if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i)) - printk(KERN_ERR "%s: failed to set mgtdef %d\n", - dev->name, i); + netdev_err(dev, "failed to set mgtdef %d\n", i); } kfree(wdev->connect_keys); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index dc675a3daa3d..fdbc23c10d8c 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -467,8 +467,8 @@ void wireless_send_event(struct net_device * dev, * The best the driver could do is to log an error message. * We will do it ourselves instead... */ - printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", - dev->name, cmd); + netdev_err(dev, "(WE) : Invalid/Unknown Wireless Event (0x%04X)\n", + cmd); return; } @@ -476,11 +476,13 @@ void wireless_send_event(struct net_device * dev, if (descr->header_type == IW_HEADER_TYPE_POINT) { /* Check if number of token fits within bounds */ if (wrqu->data.length > descr->max_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event too big (%d)\n", + wrqu->data.length); return; } if (wrqu->data.length < descr->min_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); + netdev_err(dev, "(WE) : Wireless Event too small (%d)\n", + wrqu->data.length); return; } /* Calculate extra_len - extra is NULL for restricted events */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f7af98dff409..ad96ee90fe27 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1357,11 +1357,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; int rc; - lock_kernel(); switch (cmd) { case TIOCOUTQ: { - int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + int amount; + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; rc = put_user(amount, (unsigned int __user *)argp); @@ -1375,8 +1375,10 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) * These two are safe on a single CPU system as * only user tasks fiddle here */ + lock_sock(sk); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + release_sock(sk); rc = put_user(amount, (unsigned int __user *)argp); break; } @@ -1422,9 +1424,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = x25_subscr_ioctl(cmd, argp); break; case SIOCX25GFACILITIES: { - struct x25_facilities fac = x25->facilities; - rc = copy_to_user(argp, &fac, - sizeof(fac)) ? -EFAULT : 0; + lock_sock(sk); + rc = copy_to_user(argp, &x25->facilities, + sizeof(x25->facilities)) + ? -EFAULT : 0; + release_sock(sk); break; } @@ -1435,18 +1439,19 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) sizeof(facilities))) break; rc = -EINVAL; + lock_sock(sk); if (sk->sk_state != TCP_LISTEN && sk->sk_state != TCP_CLOSE) - break; + goto out_fac_release; if (facilities.pacsize_in < X25_PS16 || facilities.pacsize_in > X25_PS4096) - break; + goto out_fac_release; if (facilities.pacsize_out < X25_PS16 || facilities.pacsize_out > X25_PS4096) - break; + goto out_fac_release; if (facilities.winsize_in < 1 || facilities.winsize_in > 127) - break; + goto out_fac_release; if (facilities.throughput) { int out = facilities.throughput & 0xf0; int in = facilities.throughput & 0x0f; @@ -1454,24 +1459,28 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) facilities.throughput |= X25_DEFAULT_THROUGHPUT << 4; else if (out < 0x30 || out > 0xD0) - break; + goto out_fac_release; if (!in) facilities.throughput |= X25_DEFAULT_THROUGHPUT; else if (in < 0x03 || in > 0x0D) - break; + goto out_fac_release; } if (facilities.reverse && (facilities.reverse & 0x81) != 0x81) - break; + goto out_fac_release; x25->facilities = facilities; rc = 0; +out_fac_release: + release_sock(sk); break; } case SIOCX25GDTEFACILITIES: { + lock_sock(sk); rc = copy_to_user(argp, &x25->dte_facilities, sizeof(x25->dte_facilities)); + release_sock(sk); if (rc) rc = -EFAULT; break; @@ -1483,26 +1492,31 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) break; rc = -EINVAL; + lock_sock(sk); if (sk->sk_state != TCP_LISTEN && sk->sk_state != TCP_CLOSE) - break; + goto out_dtefac_release; if (dtefacs.calling_len > X25_MAX_AE_LEN) - break; + goto out_dtefac_release; if (dtefacs.calling_ae == NULL) - break; + goto out_dtefac_release; if (dtefacs.called_len > X25_MAX_AE_LEN) - break; + goto out_dtefac_release; if (dtefacs.called_ae == NULL) - break; + goto out_dtefac_release; x25->dte_facilities = dtefacs; rc = 0; +out_dtefac_release: + release_sock(sk); break; } case SIOCX25GCALLUSERDATA: { - struct x25_calluserdata cud = x25->calluserdata; - rc = copy_to_user(argp, &cud, - sizeof(cud)) ? -EFAULT : 0; + lock_sock(sk); + rc = copy_to_user(argp, &x25->calluserdata, + sizeof(x25->calluserdata)) + ? -EFAULT : 0; + release_sock(sk); break; } @@ -1516,16 +1530,19 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EINVAL; if (calluserdata.cudlength > X25_MAX_CUD_LEN) break; + lock_sock(sk); x25->calluserdata = calluserdata; + release_sock(sk); rc = 0; break; } case SIOCX25GCAUSEDIAG: { - struct x25_causediag causediag; - causediag = x25->causediag; - rc = copy_to_user(argp, &causediag, - sizeof(causediag)) ? -EFAULT : 0; + lock_sock(sk); + rc = copy_to_user(argp, &x25->causediag, + sizeof(x25->causediag)) + ? -EFAULT : 0; + release_sock(sk); break; } @@ -1534,7 +1551,9 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EFAULT; if (copy_from_user(&causediag, argp, sizeof(causediag))) break; + lock_sock(sk); x25->causediag = causediag; + release_sock(sk); rc = 0; break; @@ -1543,31 +1562,37 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCX25SCUDMATCHLEN: { struct x25_subaddr sub_addr; rc = -EINVAL; + lock_sock(sk); if(sk->sk_state != TCP_CLOSE) - break; + goto out_cud_release; rc = -EFAULT; if (copy_from_user(&sub_addr, argp, sizeof(sub_addr))) - break; + goto out_cud_release; rc = -EINVAL; if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN) - break; + goto out_cud_release; x25->cudmatchlength = sub_addr.cudmatchlength; rc = 0; +out_cud_release: + release_sock(sk); break; } case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; + lock_kernel(); if (sk->sk_state != TCP_CLOSE) break; clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + unlock_kernel(); rc = 0; break; } case SIOCX25SENDCALLACCPT: { rc = -EINVAL; + lock_kernel(); if (sk->sk_state != TCP_ESTABLISHED) break; /* must call accptapprv above */ @@ -1575,6 +1600,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; + unlock_kernel(); rc = 0; break; } @@ -1583,7 +1609,6 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -ENOIOCTLCMD; break; } - unlock_kernel(); return rc; } @@ -1619,16 +1644,20 @@ static int compat_x25_subscr_ioctl(unsigned int cmd, dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { + read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; + read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(x25_subscr32, &x25_subscr, sizeof(*x25_subscr32)) ? -EFAULT : 0; } else { rc = -EINVAL; if (x25_subscr.extended == 0 || x25_subscr.extended == 1) { rc = 0; + write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; + write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); @@ -1654,19 +1683,15 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, break; case SIOCGSTAMP: rc = -EINVAL; - lock_kernel(); if (sk) rc = compat_sock_get_timestamp(sk, (struct timeval __user*)argp); - unlock_kernel(); break; case SIOCGSTAMPNS: rc = -EINVAL; - lock_kernel(); if (sk) rc = compat_sock_get_timestampns(sk, (struct timespec __user*)argp); - unlock_kernel(); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -1685,22 +1710,16 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - lock_kernel(); rc = x25_route_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25GSUBSCRIP: - lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25GFACILITIES: case SIOCX25SFACILITIES: diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index b25c6463c3e9..4cbc942f762a 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -31,8 +31,8 @@ #include <linux/init.h> #include <net/x25.h> -static LIST_HEAD(x25_neigh_list); -static DEFINE_RWLOCK(x25_neigh_list_lock); +LIST_HEAD(x25_neigh_list); +DEFINE_RWLOCK(x25_neigh_list_lock); static void x25_t20timer_expiry(unsigned long); @@ -360,16 +360,20 @@ int x25_subscr_ioctl(unsigned int cmd, void __user *arg) dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { + read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; + read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(arg, &x25_subscr, sizeof(x25_subscr)) ? -EFAULT : 0; } else { rc = -EINVAL; if (!(x25_subscr.extended && x25_subscr.extended != 1)) { rc = 0; + write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; + write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); |