diff options
Diffstat (limited to 'net')
65 files changed, 1157 insertions, 1123 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6cabf6d8a751..42233df2b099 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1088,8 +1088,8 @@ out: /* * FIXME: nonblock behaviour looks like it may have a bug. */ -static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +static int __must_check ax25_connect(struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; ax25_cb *ax25 = ax25_sk(sk), *ax25t; diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index 21a0616152fc..97a49c79c605 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -83,7 +83,7 @@ EXPORT_SYMBOL(ax2asc); */ void asc2ax(ax25_address *addr, const char *callsign) { - char *s; + const char *s; int n; for (s = callsign, n = 0; n < 6; n++) { diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 07ac0207eb69..aff3e652c2d1 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -29,17 +29,10 @@ #include <linux/mm.h> #include <linux/interrupt.h> -static struct protocol_struct { - struct protocol_struct *next; - unsigned int pid; - int (*func)(struct sk_buff *, ax25_cb *); -} *protocol_list = NULL; +static struct ax25_protocol *protocol_list; static DEFINE_RWLOCK(protocol_list_lock); -static struct linkfail_struct { - struct linkfail_struct *next; - void (*func)(ax25_cb *, int); -} *linkfail_list = NULL; +static HLIST_HEAD(ax25_linkfail_list); static DEFINE_SPINLOCK(linkfail_lock); static struct listen_struct { @@ -49,36 +42,23 @@ static struct listen_struct { } *listen_list = NULL; static DEFINE_SPINLOCK(listen_lock); -int ax25_protocol_register(unsigned int pid, - int (*func)(struct sk_buff *, ax25_cb *)) +/* + * Do not register the internal protocols AX25_P_TEXT, AX25_P_SEGMENT, + * AX25_P_IP or AX25_P_ARP ... + */ +void ax25_register_pid(struct ax25_protocol *ap) { - struct protocol_struct *protocol; - - if (pid == AX25_P_TEXT || pid == AX25_P_SEGMENT) - return 0; -#ifdef CONFIG_INET - if (pid == AX25_P_IP || pid == AX25_P_ARP) - return 0; -#endif - if ((protocol = kmalloc(sizeof(*protocol), GFP_ATOMIC)) == NULL) - return 0; - - protocol->pid = pid; - protocol->func = func; - write_lock_bh(&protocol_list_lock); - protocol->next = protocol_list; - protocol_list = protocol; + ap->next = protocol_list; + protocol_list = ap; write_unlock_bh(&protocol_list_lock); - - return 1; } -EXPORT_SYMBOL(ax25_protocol_register); +EXPORT_SYMBOL_GPL(ax25_register_pid); void ax25_protocol_release(unsigned int pid) { - struct protocol_struct *s, *protocol; + struct ax25_protocol *s, *protocol; write_lock_bh(&protocol_list_lock); protocol = protocol_list; @@ -110,54 +90,19 @@ void ax25_protocol_release(unsigned int pid) EXPORT_SYMBOL(ax25_protocol_release); -int ax25_linkfail_register(void (*func)(ax25_cb *, int)) +void ax25_linkfail_register(struct ax25_linkfail *lf) { - struct linkfail_struct *linkfail; - - if ((linkfail = kmalloc(sizeof(*linkfail), GFP_ATOMIC)) == NULL) - return 0; - - linkfail->func = func; - spin_lock_bh(&linkfail_lock); - linkfail->next = linkfail_list; - linkfail_list = linkfail; + hlist_add_head(&lf->lf_node, &ax25_linkfail_list); spin_unlock_bh(&linkfail_lock); - - return 1; } EXPORT_SYMBOL(ax25_linkfail_register); -void ax25_linkfail_release(void (*func)(ax25_cb *, int)) +void ax25_linkfail_release(struct ax25_linkfail *lf) { - struct linkfail_struct *s, *linkfail; - spin_lock_bh(&linkfail_lock); - linkfail = linkfail_list; - if (linkfail == NULL) { - spin_unlock_bh(&linkfail_lock); - return; - } - - if (linkfail->func == func) { - linkfail_list = linkfail->next; - spin_unlock_bh(&linkfail_lock); - kfree(linkfail); - return; - } - - while (linkfail != NULL && linkfail->next != NULL) { - if (linkfail->next->func == func) { - s = linkfail->next; - linkfail->next = linkfail->next->next; - spin_unlock_bh(&linkfail_lock); - kfree(s); - return; - } - - linkfail = linkfail->next; - } + hlist_del_init(&lf->lf_node); spin_unlock_bh(&linkfail_lock); } @@ -171,7 +116,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev) return 0; if ((listen = kmalloc(sizeof(*listen), GFP_ATOMIC)) == NULL) - return 0; + return -ENOMEM; listen->callsign = *callsign; listen->dev = dev; @@ -181,7 +126,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev) listen_list = listen; spin_unlock_bh(&listen_lock); - return 1; + return 0; } EXPORT_SYMBOL(ax25_listen_register); @@ -223,7 +168,7 @@ EXPORT_SYMBOL(ax25_listen_release); int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *) { int (*res)(struct sk_buff *, ax25_cb *) = NULL; - struct protocol_struct *protocol; + struct ax25_protocol *protocol; read_lock(&protocol_list_lock); for (protocol = protocol_list; protocol != NULL; protocol = protocol->next) @@ -242,7 +187,8 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) spin_lock_bh(&listen_lock); for (listen = listen_list; listen != NULL; listen = listen->next) - if (ax25cmp(&listen->callsign, callsign) == 0 && (listen->dev == dev || listen->dev == NULL)) { + if (ax25cmp(&listen->callsign, callsign) == 0 && + (listen->dev == dev || listen->dev == NULL)) { spin_unlock_bh(&listen_lock); return 1; } @@ -253,17 +199,18 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) void ax25_link_failed(ax25_cb *ax25, int reason) { - struct linkfail_struct *linkfail; + struct ax25_linkfail *lf; + struct hlist_node *node; spin_lock_bh(&linkfail_lock); - for (linkfail = linkfail_list; linkfail != NULL; linkfail = linkfail->next) - (linkfail->func)(ax25, reason); + hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node) + lf->func(ax25, reason); spin_unlock_bh(&linkfail_lock); } int ax25_protocol_is_registered(unsigned int pid) { - struct protocol_struct *protocol; + struct ax25_protocol *protocol; int res = 0; read_lock_bh(&protocol_list_lock); diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 8580356ace5c..0a0381622b1c 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -71,7 +71,7 @@ void ax25_rt_device_down(struct net_device *dev) write_unlock(&ax25_route_lock); } -static int ax25_rt_add(struct ax25_routes_struct *route) +static int __must_check ax25_rt_add(struct ax25_routes_struct *route) { ax25_route *ax25_rt; ax25_dev *ax25_dev; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 711a085eca5b..dbf98c49dbaa 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -123,10 +123,10 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (flt->opcode && ((evt == HCI_EV_CMD_COMPLETE && flt->opcode != - get_unaligned((__u16 *)(skb->data + 3))) || + get_unaligned((__le16 *)(skb->data + 3))) || (evt == HCI_EV_CMD_STATUS && flt->opcode != - get_unaligned((__u16 *)(skb->data + 4))))) + get_unaligned((__le16 *)(skb->data + 4))))) continue; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bd221ad52eaf..ea3337ad0edc 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -61,9 +61,6 @@ static int brnf_filter_vlan_tagged __read_mostly = 1; #define brnf_filter_vlan_tagged 1 #endif -int brnf_deferred_hooks; -EXPORT_SYMBOL_GPL(brnf_deferred_hooks); - static __be16 inline vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -685,110 +682,50 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, return NF_STOLEN; } -/* PF_BRIDGE/LOCAL_OUT ***********************************************/ -static int br_nf_local_out_finish(struct sk_buff *skb) -{ - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - br_forward_finish, NF_BR_PRI_FIRST + 1); - - return 0; -} - -/* This function sees both locally originated IP packets and forwarded +/* PF_BRIDGE/LOCAL_OUT *********************************************** + * + * This function sees both locally originated IP packets and forwarded * IP packets (in both cases the destination device is a bridge * device). It also sees bridged-and-DNAT'ed packets. - * To be able to filter on the physical bridge devices (with the physdev - * module), we steal packets destined to a bridge device away from the - * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, - * when we have determined the real output device. This is done in here. * * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. - * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched - * this packet before, and so the packet was locally originated. We fake - * the PF_INET/LOCAL_OUT hook. - * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, - * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure - * even routed packets that didn't arrive on a bridge interface have their - * nf_bridge->physindev set. */ + */ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct net_device *realindev, *realoutdev; + struct net_device *realindev; struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; - int pf; if (!skb->nf_bridge) return NF_ACCEPT; - if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) - pf = PF_INET; - else - pf = PF_INET6; - nf_bridge = skb->nf_bridge; - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; + if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) + return NF_ACCEPT; /* Bridged, take PF_BRIDGE/FORWARD. * (see big note in front of br_nf_pre_routing_finish) */ - if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; - } - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } + nf_bridge->physoutdev = skb->dev; + realindev = nf_bridge->physindev; - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, - skb->dev, br_forward_finish); - goto out; + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; } - realoutdev = bridge_parent(skb->dev); - if (!realoutdev) - return NF_DROP; - -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* iptables should match -o br0.x */ - if (nf_bridge->netoutdev) - realoutdev = nf_bridge->netoutdev; -#endif if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - (*pskb)->nh.raw += VLAN_HLEN; - } - /* IP forwarded traffic has a physindev, locally - * generated traffic hasn't. */ - if (realindev != NULL) { - if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) { - struct net_device *parent = bridge_parent(realindev); - if (parent) - realindev = parent; - } - - NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); - } else { - NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; } -out: + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, + br_forward_finish); return NF_STOLEN; } @@ -894,69 +831,6 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, return NF_ACCEPT; } -/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT - * and PF_INET(6)/POST_ROUTING until we have done the forwarding - * decision in the bridge code and have determined nf_bridge->physoutdev. */ -static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *skb = *pskb; - - if ((out->hard_start_xmit == br_dev_xmit && - okfn != br_nf_forward_finish && - okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit) -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - || ((out->priv_flags & IFF_802_1Q_VLAN) && - VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) -#endif - ) { - struct nf_bridge_info *nf_bridge; - - if (!skb->nf_bridge) { -#ifdef CONFIG_SYSCTL - /* This code is executed while in the IP(v6) stack, - the version should be 4 or 6. We can't use - skb->protocol because that isn't set on - PF_INET(6)/LOCAL_OUT. */ - struct iphdr *ip = skb->nh.iph; - - if (ip->version == 4 && !brnf_call_iptables) - return NF_ACCEPT; - else if (ip->version == 6 && !brnf_call_ip6tables) - return NF_ACCEPT; - else if (!brnf_deferred_hooks) - return NF_ACCEPT; -#endif - if (hook == NF_IP_POST_ROUTING) - return NF_ACCEPT; - if (!nf_bridge_alloc(skb)) - return NF_DROP; - } - - nf_bridge = skb->nf_bridge; - - /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we - * will need the indev then. For a brouter, the real indev - * can be a bridge port, so we make sure br_nf_local_out() - * doesn't use the bridge parent of the indev by using - * the BRNF_DONT_TAKE_PARENT mask. */ - if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { - nf_bridge->mask |= BRNF_DONT_TAKE_PARENT; - nf_bridge->physindev = (struct net_device *)in; - } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* the iptables outdev is br0.x, not br0 */ - if (out->priv_flags & IFF_802_1Q_VLAN) - nf_bridge->netoutdev = (struct net_device *)out; -#endif - return NF_STOP; - } - - return NF_ACCEPT; -} - /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because @@ -1002,36 +876,6 @@ static struct nf_hook_ops br_nf_ops[] = { .pf = PF_INET6, .hooknum = NF_IP6_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_FORWARD, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, - .priority = NF_IP6_PRI_FIRST, }, }; #ifdef CONFIG_SYSCTL diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8a271285f2f3..823215d8e90f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -55,6 +55,7 @@ static void queue_process(struct work_struct *work) struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; + unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; @@ -64,15 +65,19 @@ static void queue_process(struct work_struct *work) continue; } - netif_tx_lock_bh(dev); + local_irq_save(flags); + netif_tx_lock(dev); if (netif_queue_stopped(dev) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - netif_tx_unlock_bh(dev); + netif_tx_unlock(dev); + local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } + netif_tx_unlock(dev); + local_irq_restore(flags); } } @@ -242,22 +247,28 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && - npinfo->poll_owner != smp_processor_id() && - netif_tx_trylock(dev)) { - /* try until next clock tick */ - for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (!netif_queue_stopped(dev)) - status = dev->hard_start_xmit(skb, dev); + npinfo->poll_owner != smp_processor_id()) { + unsigned long flags; - if (status == NETDEV_TX_OK) - break; + local_irq_save(flags); + if (netif_tx_trylock(dev)) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (!netif_queue_stopped(dev)) + status = dev->hard_start_xmit(skb, dev); - /* tickle device maybe there is some cleanup */ - netpoll_poll(np); + if (status == NETDEV_TX_OK) + break; + + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); - udelay(USEC_PER_POLL); + udelay(USEC_PER_POLL); + } + netif_tx_unlock(dev); } - netif_tx_unlock(dev); + local_irq_restore(flags); } if (status != NETDEV_TX_OK) { diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 1f4727ddbdbf..a086c6312d3b 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -223,7 +223,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, gap = -new_head; } new_head += DCCP_MAX_ACKVEC_LEN; - } + } av->dccpav_buf_head = new_head; @@ -336,7 +336,7 @@ out_duplicate: void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) { dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); + (unsigned long long)ackno); while (len--) { const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index bcc2d12ae81c..c65cb2453e43 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -43,8 +43,6 @@ struct ccid_operations { unsigned char* value); int (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); - int (*ccid_hc_tx_insert_options)(struct sock *sk, - struct sk_buff *skb); void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); int (*ccid_hc_tx_parse_options)(struct sock *sk, @@ -146,14 +144,6 @@ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, return rc; } -static inline int ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_tx_insert_options != NULL) - return ccid->ccid_ops->ccid_hc_tx_insert_options(sk, skb); - return 0; -} - static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 2555be8f4790..fd38b05d6f79 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -351,7 +351,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) while (seqp != hctx->ccid2hctx_seqh) { ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", - (unsigned long long)seqp->ccid2s_seq, + (unsigned long long)seqp->ccid2s_seq, seqp->ccid2s_acked, seqp->ccid2s_sent); seqp = seqp->ccid2s_next; } @@ -473,7 +473,7 @@ static inline void ccid2_new_ack(struct sock *sk, /* first measurement */ if (hctx->ccid2hctx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", - r, jiffies, + r, jiffies, (unsigned long long)seqp->ccid2s_seq); ccid2_change_srtt(hctx, r); hctx->ccid2hctx_rttvar = r >> 1; @@ -518,8 +518,8 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_lastrtt = jiffies; ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", - hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, - hctx->ccid2hctx_rto, HZ, r); + hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, + hctx->ccid2hctx_rto, HZ, r); hctx->ccid2hctx_sent = 0; } @@ -667,9 +667,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* new packet received or marked */ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == + if (state == DCCP_ACKVEC_STATE_ECN_MARKED) { - ccid2_congestion_event(hctx, + ccid2_congestion_event(hctx, seqp); } else ccid2_new_ack(sk, seqp, diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 66a27b9688ca..40402c59506a 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -41,27 +41,6 @@ #include "lib/tfrc.h" #include "ccid3.h" -/* - * Reason for maths here is to avoid 32 bit overflow when a is big. - * With this we get close to the limit. - */ -static u32 usecs_div(const u32 a, const u32 b) -{ - const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : - a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : - a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : - a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : - a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : - a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : - a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : - a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : - 100000; - const u32 tmp = a * (USEC_PER_SEC / div); - return (b >= 2 * div) ? tmp / (b / div) : tmp; -} - - - #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static int ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) @@ -108,8 +87,9 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) { timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); - /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ - hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x); + /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ + hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x >> 6); /* Update nominal send time with regard to the new t_ipi */ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); @@ -128,40 +108,44 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) * X = max(min(2 * X, 2 * X_recv), s / R); * tld = now; * + * Note: X and X_recv are both stored in units of 64 * bytes/second, to support + * fine-grained resolution of sending rates. This requires scaling by 2^6 + * throughout the code. Only X_calc is unscaled (in bytes/second). + * * If X has changed, we also update the scheduled send time t_now, * the inter-packet interval t_ipi, and the delta value. - */ + */ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - const __u32 old_x = hctx->ccid3hctx_x; + const __u64 old_x = hctx->ccid3hctx_x; if (hctx->ccid3hctx_p > 0) { - hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); - hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc, - hctx->ccid3hctx_x_recv * 2), - hctx->ccid3hctx_s / TFRC_T_MBI); - - } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >= - hctx->ccid3hctx_rtt) { - hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv, - hctx->ccid3hctx_x ) * 2, - usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt) ); + + hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, + hctx->ccid3hctx_x_recv * 2); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); + + } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - + (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + + hctx->ccid3hctx_x = + max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv), + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = *now; - } else - ccid3_pr_debug("Not changing X\n"); + } if (hctx->ccid3hctx_x != old_x) ccid3_update_send_time(hctx); } /* - * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) - * @len: DCCP packet payload size in bytes + * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) + * @len: DCCP packet payload size in bytes */ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) { @@ -178,6 +162,33 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) */ } +/* + * Update Window Counter using the algorithm from [RFC 4342, 8.1]. + * The algorithm is not applicable if RTT < 4 microseconds. + */ +static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, + struct timeval *now) +{ + suseconds_t delta; + u32 quarter_rtts; + + if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ + return; + + delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); + DCCP_BUG_ON(delta < 0); + + quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); + + if (quarter_rtts > 0) { + hctx->ccid3hctx_t_last_win_count = *now; + hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ + + ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); + } +} + static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; @@ -191,20 +202,20 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) goto restart_timer; } - ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); - + switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_FBACK: /* RFC 3448, 4.4: Halve send rate directly */ - hctx->ccid3hctx_x = min_t(u32, hctx->ccid3hctx_x / 2, - hctx->ccid3hctx_s / TFRC_T_MBI); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); - ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " - "bytes/s\n", - dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u " + "bytes/s\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), - hctx->ccid3hctx_x); + (unsigned)(hctx->ccid3hctx_x >> 6)); /* The value of R is still undefined and so we can not recompute * the timout value. Keep initial value as per [RFC 4342, 5]. */ t_nfb = TFRC_INITIAL_TIMEOUT; @@ -213,34 +224,46 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) case TFRC_SSTATE_FBACK: /* * Check if IDLE since last timeout and recv rate is less than - * 4 packets per RTT + * 4 packets (in units of 64*bytes/sec) per RTT */ if (!hctx->ccid3hctx_idle || - (hctx->ccid3hctx_x_recv >= - 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { + (hctx->ccid3hctx_x_recv >= 4 * + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt))) { struct timeval now; - ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", + ccid3_pr_debug("%s(%p, state=%s), not idle\n", dccp_role(sk), sk, - ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* Halve sending rate */ + ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* If (p == 0 || X_calc > 2 * X_recv) + /* + * Modify the cached value of X_recv [RFC 3448, 4.4] + * + * If (p == 0 || X_calc > 2 * X_recv) * X_recv = max(X_recv / 2, s / (2 * t_mbi)); * Else * X_recv = X_calc / 4; + * + * Note that X_recv is scaled by 2^6 while X_calc is not */ BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); if (hctx->ccid3hctx_p == 0 || - hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) - hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, - hctx->ccid3hctx_s / (2 * TFRC_T_MBI)); - else - hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; - - /* Update sending rate */ - dccp_timestamp(sk, &now); + (hctx->ccid3hctx_x_calc > + (hctx->ccid3hctx_x_recv >> 5))) { + + hctx->ccid3hctx_x_recv = + max(hctx->ccid3hctx_x_recv / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + (2 * TFRC_T_MBI)); + + if (hctx->ccid3hctx_p == 0) + dccp_timestamp(sk, &now); + } else { + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; + hctx->ccid3hctx_x_recv <<= 4; + } + /* Now recalculate X [RFC 3448, 4.3, step (4)] */ ccid3_hc_tx_update_x(sk, &now); } /* @@ -251,7 +274,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); break; case TFRC_SSTATE_NO_SENT: - DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk); /* fall through */ case TFRC_SSTATE_TERM: goto out; @@ -277,9 +300,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct dccp_tx_hist_entry *new_packet; struct timeval now; - long delay; + suseconds_t delay; BUG_ON(hctx == NULL); @@ -291,34 +313,21 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) if (unlikely(skb->len == 0)) return -EBADMSG; - /* See if last packet allocated was not sent */ - new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (new_packet == NULL || new_packet->dccphtx_sent) { - new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, - GFP_ATOMIC); - - if (unlikely(new_packet == NULL)) { - DCCP_WARN("%s, sk=%p, not enough mem to add to history," - "send refused\n", dccp_role(sk), sk); - return -ENOBUFS; - } - - dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); - } - dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + (jiffies + + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - /* Set initial sending rate to 1 packet per second */ + /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */ ccid3_hc_tx_update_s(hctx, skb->len); - hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_x <<= 6; /* First timeout, according to [RFC 3448, 4.2], is 1 second */ hctx->ccid3hctx_t_ipi = USEC_PER_SEC; @@ -332,77 +341,57 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) case TFRC_SSTATE_FBACK: delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); /* - * Scheduling of packet transmissions [RFC 3448, 4.6] + * Scheduling of packet transmissions [RFC 3448, 4.6] * * if (t_now > t_nom - delta) * // send the packet now * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (long)hctx->ccid3hctx_delta >= 0) + if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) return delay / 1000L; + + ccid3_hc_tx_update_win_count(hctx, &now); break; case TFRC_SSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return -EINVAL; } /* prepare to send now (add options etc.) */ dp->dccps_hc_tx_insert_options = 1; - new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = - hctx->ccid3hctx_last_win_count; + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + + /* set the nominal send time for the next following packet */ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); return 0; } -static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, + unsigned int len) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; - unsigned long quarter_rtt; struct dccp_tx_hist_entry *packet; BUG_ON(hctx == NULL); - dccp_timestamp(sk, &now); - ccid3_hc_tx_update_s(hctx, len); - packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); if (unlikely(packet == NULL)) { - DCCP_WARN("packet doesn't exist in history!\n"); - return; - } - if (unlikely(packet->dccphtx_sent)) { - DCCP_WARN("no unsent packet in history!\n"); + DCCP_CRIT("packet history - out of memory!"); return; } - packet->dccphtx_tstamp = now; - packet->dccphtx_seqno = dp->dccps_gss; - /* - * Check if win_count have changed - * Algorithm in "8.1. Window Counter Value" in RFC 4342. - */ - quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); - if (likely(hctx->ccid3hctx_rtt > 8)) - quarter_rtt /= hctx->ccid3hctx_rtt / 4; - - if (quarter_rtt > 0) { - hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + - min_t(unsigned long, quarter_rtt, 5)) % 16; - ccid3_pr_debug("%s, sk=%p, window changed from " - "%u to %u!\n", - dccp_role(sk), sk, - packet->dccphtx_ccval, - hctx->ccid3hctx_last_win_count); - } + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); - hctx->ccid3hctx_idle = 0; - packet->dccphtx_rtt = hctx->ccid3hctx_rtt; - packet->dccphtx_sent = 1; + dccp_timestamp(sk, &now); + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; + packet->dccphtx_sent = 1; + hctx->ccid3hctx_idle = 0; } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -414,7 +403,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct timeval now; unsigned long t_nfb; u32 pinv; - long r_sample, t_elapsed; + suseconds_t r_sample, t_elapsed; BUG_ON(hctx == NULL); @@ -430,44 +419,44 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_SSTATE_FBACK: /* get packet from history to look up t_recvdata */ packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + DCCP_SKB_CB(skb)->dccpd_ack_seq); if (unlikely(packet == NULL)) { DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist " "in history!\n", dccp_role(sk), sk, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } - /* Update receive rate */ + /* Update receive rate in units of 64 * bytes/second */ hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; + hctx->ccid3hctx_x_recv <<= 6; /* Update loss event rate */ pinv = opt_recv->ccid3or_loss_event_rate; - if (pinv == ~0U || pinv == 0) + if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ hctx->ccid3hctx_p = 0; - else - hctx->ccid3hctx_p = 1000000 / pinv; + else /* can not exceed 100% */ + hctx->ccid3hctx_p = 1000000 / pinv; dccp_timestamp(sk, &now); /* * Calculate new round trip sample as per [RFC 3448, 4.3] by - * R_sample = (now - t_recvdata) - t_elapsed + * R_sample = (now - t_recvdata) - t_elapsed */ r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; - if (unlikely(r_sample <= 0)) { - DCCP_WARN("WARNING: R_sample (%ld) <= 0!\n", r_sample); - r_sample = 0; - } else if (unlikely(r_sample <= t_elapsed)) - DCCP_WARN("WARNING: r_sample=%ldus <= t_elapsed=%ldus\n", - r_sample, t_elapsed); + DCCP_BUG_ON(r_sample < 0); + if (unlikely(r_sample <= t_elapsed)) + DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n", + (int)r_sample, (int)t_elapsed); else r_sample -= t_elapsed; + CCID3_RTT_SANITY_CHECK(r_sample); - /* Update RTT estimate by + /* Update RTT estimate by * If (No feedback recv) * R = R_sample; * Else @@ -476,34 +465,45 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * q is a constant, RFC 3448 recomments 0.9 */ if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { - /* Use Larger Initial Windows [RFC 4342, sec. 5] - * We deviate in that we use `s' instead of `MSS'. */ - u16 w_init = max( 4 * hctx->ccid3hctx_s, - max(2 * hctx->ccid3hctx_s, 4380)); + /* + * Larger Initial Windows [RFC 4342, sec. 5] + * We deviate in that we use `s' instead of `MSS'. + */ + __u64 w_init = min(4 * hctx->ccid3hctx_s, + max(2 * hctx->ccid3hctx_s, 4380)); hctx->ccid3hctx_rtt = r_sample; - hctx->ccid3hctx_x = usecs_div(w_init, r_sample); + hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample); hctx->ccid3hctx_t_ld = now; ccid3_update_send_time(hctx); - ccid3_pr_debug("%s(%p), s=%u, w_init=%u, " - "R_sample=%ldus, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_s, w_init, r_sample, - hctx->ccid3hctx_x); + ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, " + "R_sample=%dus, X=%u\n", dccp_role(sk), + sk, hctx->ccid3hctx_s, w_init, + (int)r_sample, + (unsigned)(hctx->ccid3hctx_x >> 6)); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); } else { hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt + - (u32)r_sample ) / 10; - + (u32)r_sample) / 10; + + /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ + if (hctx->ccid3hctx_p > 0) + hctx->ccid3hctx_x_calc = + tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); ccid3_hc_tx_update_x(sk, &now); - ccid3_pr_debug("%s(%p), RTT=%uus (sample=%ldus), s=%u, " - "p=%u, X_calc=%u, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_rtt, r_sample, + ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, " + "p=%u, X_calc=%u, X_recv=%u, X=%u\n", + dccp_role(sk), + sk, hctx->ccid3hctx_rtt, (int)r_sample, hctx->ccid3hctx_s, hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, - hctx->ccid3hctx_x); + (unsigned)(hctx->ccid3hctx_x_recv >> 6), + (unsigned)(hctx->ccid3hctx_x >> 6)); } /* unschedule no feedback timer */ @@ -513,57 +513,48 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); /* - * As we have calculated new ipi, delta, t_nom it is possible that - * we now can send a packet, so wake up dccp_wait_for_ccid + * As we have calculated new ipi, delta, t_nom it is possible + * that we now can send a packet, so wake up dccp_wait_for_ccid */ sk->sk_write_space(sk); /* * Update timeout interval for the nofeedback timer. * We use a configuration option to increase the lower bound. - * This can help avoid triggering the nofeedback timer too often - * ('spinning') on LANs with small RTTs. + * This can help avoid triggering the nofeedback timer too + * often ('spinning') on LANs with small RTTs. */ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, CONFIG_IP_DCCP_CCID3_RTO * - (USEC_PER_SEC/1000) ); + (USEC_PER_SEC/1000)); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) */ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); - - ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " + + ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, - usecs_to_jiffies(t_nfb), t_nfb); + dccp_role(sk), + sk, usecs_to_jiffies(t_nfb), t_nfb); - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); /* set idle flag */ - hctx->ccid3hctx_idle = 1; + hctx->ccid3hctx_idle = 1; break; case TFRC_SSTATE_NO_SENT: - if (dccp_sk(sk)->dccps_role == DCCP_ROLE_CLIENT) - DCCP_WARN("Illegal ACK received - no packet sent\n"); + /* + * XXX when implementing bidirectional rx/tx check this again + */ + DCCP_WARN("Illegal ACK received - no packet sent\n"); /* fall through */ case TFRC_SSTATE_TERM: /* ignore feedback when closing */ break; } } -static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) -{ - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - - BUG_ON(hctx == NULL); - - if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN) - DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; - return 0; -} - static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char *value) @@ -588,13 +579,14 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: if (unlikely(len != 4)) { - DCCP_WARN("%s, sk=%p, invalid len %d " + DCCP_WARN("%s(%p), invalid len %d " "for TFRC_OPT_LOSS_EVENT_RATE\n", dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value); - ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", + opt_recv->ccid3or_loss_event_rate = + ntohl(*(__be32 *)value); + ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_event_rate); } @@ -602,20 +594,21 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, case TFRC_OPT_LOSS_INTERVALS: opt_recv->ccid3or_loss_intervals_idx = idx; opt_recv->ccid3or_loss_intervals_len = len; - ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", + ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_intervals_idx, opt_recv->ccid3or_loss_intervals_len); break; case TFRC_OPT_RECEIVE_RATE: if (unlikely(len != 4)) { - DCCP_WARN("%s, sk=%p, invalid len %d " + DCCP_WARN("%s(%p), invalid len %d " "for TFRC_OPT_RECEIVE_RATE\n", dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value); - ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", + opt_recv->ccid3or_receive_rate = + ntohl(*(__be32 *)value); + ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_receive_rate); } @@ -630,10 +623,12 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); hctx->ccid3hctx_s = 0; + hctx->ccid3hctx_rtt = 0; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); - hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.function = + ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; init_timer(&hctx->ccid3hctx_no_feedback_timer); @@ -698,8 +693,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; struct timeval now; + suseconds_t delta; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); dccp_timestamp(sk, &now); @@ -707,21 +703,21 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; - case TFRC_RSTATE_DATA: { - const u32 delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); - hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, - delta); - } + case TFRC_RSTATE_DATA: + delta = timeval_delta(&now, + &hcrx->ccid3hcrx_tstamp_last_feedback); + DCCP_BUG_ON(delta < 0); + hcrx->ccid3hcrx_x_recv = + scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); break; case TFRC_RSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return; } packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (unlikely(packet == NULL)) { - DCCP_WARN("%s, sk=%p, no data packet in history!\n", + DCCP_WARN("%s(%p), no data packet in history!\n", dccp_role(sk), sk); return; } @@ -730,13 +726,19 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; - /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = - timeval_delta(&now, &packet->dccphrx_tstamp) / 10; + /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ + delta = timeval_delta(&now, &packet->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + hcrx->ccid3hcrx_elapsed_time = delta / 10; + if (hcrx->ccid3hcrx_p == 0) - hcrx->ccid3hcrx_pinv = ~0; - else + hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ + else if (hcrx->ccid3hcrx_p > 1000000) { + DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p); + hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */ + } else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } @@ -764,9 +766,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_elapsed_time)) || dccp_insert_option_timestamp(sk, skb) || dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, - &pinv, sizeof(pinv)) || + &pinv, sizeof(pinv)) || dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, - &x_recv, sizeof(x_recv))) + &x_recv, sizeof(x_recv))) return -1; return 0; @@ -780,12 +782,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; - u32 rtt, delta, x_recv, fval, p, tmp2; + u32 x_recv, p; + suseconds_t rtt, delta; struct timeval tstamp = { 0, }; int interval = 0; int win_count = 0; int step = 0; - u64 tmp1; + u64 fval; list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, dccphrx_node) { @@ -810,13 +813,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } if (unlikely(step == 0)) { - DCCP_WARN("%s, sk=%p, packet history has no data packets!\n", + DCCP_WARN("%s(%p), packet history has no data packets!\n", dccp_role(sk), sk); return ~0; } if (unlikely(interval == 0)) { - DCCP_WARN("%s, sk=%p, Could not find a win_count interval > 0." + DCCP_WARN("%s(%p), Could not find a win_count interval > 0." "Defaulting to 1\n", dccp_role(sk), sk); interval = 1; } @@ -825,41 +828,51 @@ found: DCCP_CRIT("tail is null\n"); return ~0; } - rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; - ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", - dccp_role(sk), sk, rtt); - if (rtt == 0) { - DCCP_WARN("RTT==0, setting to 1\n"); - rtt = 1; + delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + + rtt = delta * 4 / interval; + ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", + dccp_role(sk), sk, (int)rtt); + + /* + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = -------- + * R * fval + * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. + */ + if (rtt == 0) { /* would result in divide-by-zero */ + DCCP_WARN("RTT==0\n"); + return ~0; } dccp_timestamp(sk, &tstamp); delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); - - if (x_recv == 0) - x_recv = hcrx->ccid3hcrx_x_recv; - - tmp1 = (u64)x_recv * (u64)rtt; - do_div(tmp1,10000000); - tmp2 = (u32)tmp1; - - if (!tmp2) { - DCCP_CRIT("tmp2 = 0, x_recv = %u, rtt =%u\n", x_recv, rtt); - return ~0; + DCCP_BUG_ON(delta <= 0); + + x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0; + } } - fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; - /* do not alter order above or you will get overflow on 32 bit */ + fval = scaled_div(hcrx->ccid3hcrx_s, rtt); + fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); - ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " + + ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); if (p == 0) return ~0; else - return 1000000 / p; + return 1000000 / p; } static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) @@ -913,7 +926,8 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + struct dccp_rx_hist_entry *rx_hist = + dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); u64 seqno = packet->dccphrx_seqno; u64 tmp_seqno; int loss = 0; @@ -941,7 +955,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, dccp_inc_seqno(&tmp_seqno); while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, tmp_seqno, &ccval)) { - hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; hcrx->ccid3hcrx_ccval_nonloss = ccval; dccp_inc_seqno(&tmp_seqno); } @@ -967,7 +981,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u32 p_prev, rtt_prev, r_sample, t_elapsed; + u32 p_prev, rtt_prev; + suseconds_t r_sample, t_elapsed; int loss, payload_size; BUG_ON(hcrx == NULL); @@ -987,11 +1002,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample = timeval_usecs(&now); t_elapsed = opt_recv->dccpor_elapsed_time * 10; + DCCP_BUG_ON(r_sample < 0); if (unlikely(r_sample <= t_elapsed)) - DCCP_WARN("r_sample=%uus, t_elapsed=%uus\n", + DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n", r_sample, t_elapsed); else r_sample -= t_elapsed; + CCID3_RTT_SANITY_CHECK(r_sample); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -1000,8 +1017,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample / 10; if (rtt_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", - dccp_role(sk), hcrx->ccid3hcrx_rtt, + ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; case DCCP_PKT_DATA: @@ -1013,7 +1030,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { - DCCP_WARN("%s, sk=%p, Not enough mem to add rx packet " + DCCP_WARN("%s(%p), Not enough mem to add rx packet " "to history, consider it lost!\n", dccp_role(sk), sk); return; } @@ -1028,9 +1045,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " - "feedback\n", - dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " + "feedback\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); ccid3_hc_rx_send_feedback(sk); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); @@ -1041,19 +1057,19 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; dccp_timestamp(sk, &now); - if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= - hcrx->ccid3hcrx_rtt) { + if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - + (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } return; case TFRC_RSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return; } /* Dealing with packet loss */ - ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", + ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); p_prev = hcrx->ccid3hcrx_p; @@ -1078,7 +1094,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + ccid3_pr_debug("entry\n"); hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); @@ -1086,7 +1102,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; hcrx->ccid3hcrx_s = 0; - hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ + hcrx->ccid3hcrx_rtt = 0; return 0; } @@ -1115,9 +1131,9 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) BUG_ON(hcrx == NULL); - info->tcpi_ca_state = hcrx->ccid3hcrx_state; - info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; + info->tcpi_ca_state = hcrx->ccid3hcrx_state; + info->tcpi_options |= TCPI_OPT_TIMESTAMPS; + info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; } static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) @@ -1198,7 +1214,6 @@ static struct ccid_operations ccid3 = { .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, - .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), .ccid_hc_rx_init = ccid3_hc_rx_init, @@ -1210,7 +1225,7 @@ static struct ccid_operations ccid3 = { .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; - + #ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, int, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); @@ -1233,7 +1248,7 @@ static __init int ccid3_module_init(void) goto out_free_tx; rc = ccid_register(&ccid3); - if (rc != 0) + if (rc != 0) goto out_free_loss_interval_history; out: return rc; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 07596d704ef9..15776a88c090 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -51,6 +51,16 @@ /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ #define TFRC_T_MBI 64 +/* What we think is a reasonable upper limit on RTT values */ +#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC)) + +#define CCID3_RTT_SANITY_CHECK(rtt) do { \ + if (rtt > CCID3_SANE_RTT_MAX) { \ + DCCP_CRIT("RTT (%d) too large, substituting %d", \ + (int)rtt, (int)CCID3_SANE_RTT_MAX); \ + rtt = CCID3_SANE_RTT_MAX; \ + } } while (0) + enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, TFRC_OPT_LOSS_INTERVALS = 193, @@ -67,7 +77,7 @@ struct ccid3_options_received { /* TFRC sender states */ enum ccid3_hc_tx_states { - TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_SENT = 1, TFRC_SSTATE_NO_FBACK, TFRC_SSTATE_FBACK, TFRC_SSTATE_TERM, @@ -75,23 +85,23 @@ enum ccid3_hc_tx_states { /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket * - * @ccid3hctx_x - Current sending rate - * @ccid3hctx_x_recv - Receive rate - * @ccid3hctx_x_calc - Calculated send rate (RFC 3448, 3.1) + * @ccid3hctx_x - Current sending rate in 64 * bytes per second + * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second + * @ccid3hctx_x_calc - Calculated rate in bytes per second * @ccid3hctx_rtt - Estimate of current round trip time in usecs * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 - * @ccid3hctx_s - Packet size - * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1) - * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) + * @ccid3hctx_s - Packet size in bytes + * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs + * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states * @ccid3hctx_last_win_count - Last window counter sent * @ccid3hctx_t_last_win_count - Timestamp of earliest packet - * with last_win_count value sent + * with last_win_count value sent * @ccid3hctx_no_feedback_timer - Handle to no feedback timer * @ccid3hctx_idle - Flag indicating that sender is idling * @ccid3hctx_t_ld - Time last doubled during slow start * @ccid3hctx_t_nom - Nominal send time of next packet - * @ccid3hctx_delta - Send timer delta + * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs * @ccid3hctx_hist - Packet history * @ccid3hctx_options_received - Parsed set of retrieved options */ @@ -105,7 +115,7 @@ struct ccid3_hc_tx_sock { #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - enum ccid3_hc_tx_states ccid3hctx_state:8; + enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; struct timeval ccid3hctx_t_last_win_count; @@ -119,7 +129,7 @@ struct ccid3_hc_tx_sock { /* TFRC receiver states */ enum ccid3_hc_rx_states { - TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_NO_DATA = 1, TFRC_RSTATE_DATA, TFRC_RSTATE_TERM = 127, }; @@ -147,18 +157,18 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_nonloss:48, + u64 ccid3hcrx_seqno_nonloss:48, ccid3hcrx_ccval_nonloss:4, ccid3hcrx_ccval_last_counter:4; enum ccid3_hc_rx_states ccid3hcrx_state:8; - u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; struct list_head ccid3hcrx_li_hist; - u16 ccid3hcrx_s; - u32 ccid3hcrx_pinv; - u32 ccid3hcrx_elapsed_time; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index b876c9c81c65..2e8ef42721e2 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -36,9 +36,100 @@ #include <linux/module.h> #include <linux/string.h> - #include "packet_history.h" +/* + * Transmitter History Routines + */ +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +/* + * Receiver History Routines + */ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) { struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); @@ -83,18 +174,24 @@ void dccp_rx_hist_delete(struct dccp_rx_hist *hist) EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); -void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) { - struct dccp_rx_hist_entry *entry, *next; + struct dccp_rx_hist_entry *packet = NULL, *entry; - list_for_each_entry_safe(entry, next, list, dccphrx_node) { - list_del_init(&entry->dccphrx_node); - kmem_cache_free(hist->dccprxh_slab, entry); - } -} + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } -EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + if (packet) + *ccval = packet->dccphrx_ccval; + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); struct dccp_rx_hist_entry * dccp_rx_hist_find_data_packet(const struct list_head *list) { @@ -184,110 +281,18 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -struct dccp_tx_hist *dccp_tx_hist_new(const char *name) -{ - struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_tx_hist_mask[] = "tx_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_tx_hist_mask, name); - hist->dccptxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_tx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccptxh_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_new); - -void dccp_tx_hist_delete(struct dccp_tx_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccptxh_slab); - - kmem_cache_destroy(hist->dccptxh_slab); - kfree(name); - kfree(hist); -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); - -struct dccp_tx_hist_entry * - dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) -{ - struct dccp_tx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphtx_node) - if (entry->dccphtx_seqno == seq) { - packet = entry; - break; - } - - return packet; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); - -int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, - u8 *ccval) -{ - struct dccp_rx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphrx_node) - if (entry->dccphrx_seqno == seq) { - packet = entry; - break; - } - - if (packet) - *ccval = packet->dccphrx_ccval; - - return packet != NULL; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); - -void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, - struct list_head *list, - struct dccp_tx_hist_entry *packet) +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) { - struct dccp_tx_hist_entry *next; + struct dccp_rx_hist_entry *entry, *next; - list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { - list_del_init(&packet->dccphtx_node); - dccp_tx_hist_entry_delete(hist, packet); + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); } } -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); - -void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) -{ - struct dccp_tx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, list, dccphtx_node) { - list_del_init(&entry->dccphtx_node); - dccp_tx_hist_entry_delete(hist, entry); - } -} +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 9a8bcf224aa7..1f960c19ea1b 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -49,43 +49,27 @@ #define TFRC_WIN_COUNT_PER_RTT 4 #define TFRC_WIN_COUNT_LIMIT 16 +/* + * Transmitter History data structures and declarations + */ struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, - dccphtx_ccval:4, dccphtx_sent:1; u32 dccphtx_rtt; struct timeval dccphtx_tstamp; }; -struct dccp_rx_hist_entry { - struct list_head dccphrx_node; - u64 dccphrx_seqno:48, - dccphrx_ccval:4, - dccphrx_type:4; - u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; -}; - struct dccp_tx_hist { struct kmem_cache *dccptxh_slab; }; extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); -extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); - -struct dccp_rx_hist { - struct kmem_cache *dccprxh_slab; -}; - -extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); -extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); -extern struct dccp_rx_hist_entry * - dccp_rx_hist_find_data_packet(const struct list_head *list); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); static inline struct dccp_tx_hist_entry * - dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const gfp_t prio) + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const gfp_t prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -96,18 +80,20 @@ static inline struct dccp_tx_hist_entry * return entry; } -static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, - struct dccp_tx_hist_entry *entry) +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_head(struct list_head *list) { - if (entry != NULL) - kmem_cache_free(hist->dccptxh_slab, entry); + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; } extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); -extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, - u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) @@ -115,30 +101,45 @@ static inline void dccp_tx_hist_add_entry(struct list_head *list, list_add(&entry->dccphtx_node, list); } +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *next); -extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, - struct list_head *list); +/* + * Receiver History data structures and declarations + */ +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_ccval:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; -static inline struct dccp_tx_hist_entry * - dccp_tx_hist_head(struct list_head *list) -{ - struct dccp_tx_hist_entry *head = NULL; +struct dccp_rx_hist { + struct kmem_cache *dccprxh_slab; +}; - if (!list_empty(list)) - head = list_entry(list->next, struct dccp_tx_hist_entry, - dccphtx_node); - return head; -} +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); static inline struct dccp_rx_hist_entry * - dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, - const u32 ndp, - const struct sk_buff *skb, - const gfp_t prio) + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, + const u32 ndp, + const struct sk_buff *skb, + const gfp_t prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); @@ -156,18 +157,8 @@ static inline struct dccp_rx_hist_entry * return entry; } -static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, - struct dccp_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccprxh_slab, entry); -} - -extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, - struct list_head *list); - static inline struct dccp_rx_hist_entry * - dccp_rx_hist_head(struct list_head *list) + dccp_rx_hist_head(struct list_head *list) { struct dccp_rx_hist_entry *head = NULL; @@ -177,6 +168,27 @@ static inline struct dccp_rx_hist_entry * return head; } +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + static inline int dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) { @@ -184,12 +196,6 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, - struct list_head *rx_list, - struct list_head *li_list, - struct dccp_rx_hist_entry *packet, - u64 nonloss_seqno); - extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 45f30f59ea2a..faf5f7e219e3 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -13,8 +13,29 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - #include <linux/types.h> +#include <asm/div64.h> + +/* integer-arithmetic divisions of type (a * 1000000)/b */ +static inline u64 scaled_div(u64 a, u32 b) +{ + BUG_ON(b==0); + a *= 1000000; + do_div(a, b); + return a; +} + +static inline u32 scaled_div32(u64 a, u32 b) +{ + u64 result = scaled_div(a, b); + + if (result > UINT_MAX) { + DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", + (unsigned long long)a, b); + return UINT_MAX; + } + return result; +} extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index ddac2c511e2f..90009fd77e15 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -13,7 +13,6 @@ */ #include <linux/module.h> -#include <asm/div64.h> #include "../../dccp.h" #include "tfrc.h" @@ -616,15 +615,12 @@ static inline u32 tfrc_binsearch(u32 fval, u8 small) * @R: RTT scaled by 1000000 (i.e., microseconds) * @p: loss ratio estimate scaled by 1000000 * Returns X_calc in bytes per second (not scaled). - * - * Note: DO NOT alter this code unless you run test cases against it, - * as the code has been optimized to stop underflow/overflow. */ u32 tfrc_calc_x(u16 s, u32 R, u32 p) { - int index; + u16 index; u32 f; - u64 tmp1, tmp2; + u64 result; /* check against invalid parameters and divide-by-zero */ BUG_ON(p > 1000000); /* p must not exceed 100% */ @@ -650,15 +646,17 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) f = tfrc_calc_x_lookup[index][0]; } - /* The following computes X = s/(R*f(p)) in bytes per second. Since f(p) - * and R are both scaled by 1000000, we need to multiply by 1000000^2. - * ==> DO NOT alter this unless you test against overflow on 32 bit */ - tmp1 = ((u64)s * 100000000); - tmp2 = ((u64)R * (u64)f); - do_div(tmp2, 10000); - do_div(tmp1, tmp2); - - return (u32)tmp1; + /* + * Compute X = s/(R*f(p)) in bytes per second. + * Since f(p) and R are both scaled by 1000000, we need to multiply by + * 1000000^2. To avoid overflow, the result is computed in two stages. + * This works under almost all reasonable operational conditions, for a + * wide range of parameters. Yet, should some strange combination of + * parameters result in overflow, the use of scaled_div32 will catch + * this and return UINT_MAX - which is a logically adequate consequence. + */ + result = scaled_div(s, R); + return scaled_div32(result, f); } EXPORT_SYMBOL_GPL(tfrc_calc_x); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 68886986c8e4..a0900bf98e6b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -80,8 +80,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ -#define DCCP_XMIT_TIMEO 30000 /* Time/msecs for blocking transmit per packet */ - /* sysctl variables for DCCP */ extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; @@ -434,6 +432,7 @@ static inline void timeval_sub_usecs(struct timeval *tv, tv->tv_sec--; tv->tv_usec += USEC_PER_SEC; } + DCCP_BUG_ON(tv->tv_sec < 0); } #ifdef CONFIG_SYSCTL diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4dc487f27a1f..95b6927ec653 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -329,7 +329,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, switch (type) { case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break; case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break; - default: DCCP_WARN("invalid type %d\n", type); return; + default: DCCP_WARN("invalid type %d\n", type); return; } opt->dccpop_feat = feature; @@ -427,7 +427,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, switch (type) { case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; - default: DCCP_WARN("invalid type %d\n", type); + default: DCCP_WARN("invalid type %d\n", type); return 1; } @@ -610,7 +610,7 @@ const char *dccp_feat_typename(const u8 type) case DCCPO_CHANGE_R: return("ChangeR"); case DCCPO_CONFIRM_R: return("ConfirmR"); /* the following case must not appear in feature negotation */ - default: dccp_pr_debug("unknown type %d [BUG!]\n", type); + default: dccp_pr_debug("unknown type %d [BUG!]\n", type); } return NULL; } diff --git a/net/dccp/input.c b/net/dccp/input.c index 7371a2f3acf4..565bc80557ce 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -1,6 +1,6 @@ /* * net/dccp/input.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -82,7 +82,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Otherwise, * Drop packet and return */ - if (dh->dccph_type == DCCP_PKT_SYNC || + if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && @@ -185,8 +185,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, dccp_rcv_close(sk, skb); return 0; case DCCP_PKT_REQUEST: - /* Step 7 - * or (S.is_server and P.type == Response) + /* Step 7 + * or (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state >= OPEN and P.type == Request * and P.seqno >= S.OSR) @@ -248,8 +248,18 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + /* + * Deliver to the CCID module in charge. + * FIXME: Currently DCCP operates one-directional only, i.e. a listening + * server is not at the same time a connecting client. There is + * not much sense in delivering to both rx/tx sides at the moment + * (only one is active at a time); when moving to bidirectional + * service, this needs to be revised. + */ + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + else + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: @@ -264,7 +274,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, const struct dccp_hdr *dh, const unsigned len) { - /* + /* * Step 4: Prepare sequence numbers in REQUEST * If S.state == REQUEST, * If (P.type == Response or P.type == Reset) @@ -332,7 +342,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer - * Continue with S.state == PARTOPEN + * Continue with S.state == PARTOPEN * / * Step 12 will send the Ack completing the * three-way handshake * / */ @@ -363,7 +373,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, */ __kfree_skb(skb); return 0; - } + } dccp_send_ack(sk); return -1; } @@ -371,7 +381,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; - return 1; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -478,14 +488,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + /* XXX see the comments in dccp_rcv_established about this */ + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + else + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); } /* @@ -567,7 +580,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - if (!queued) { + if (!queued) { discard: __kfree_skb(skb); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ff81679c9f17..90c74b4adb73 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -157,7 +157,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the + * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if ((dst = __sk_dst_check(sk, 0)) == NULL) @@ -467,7 +467,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, .dport = dccp_hdr(skb)->dccph_sport } - } + } }; security_skb_classify_flow(skb, &fl); @@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct inet_request_sock *ireq; struct request_sock *req; struct dccp_request_sock *dreq; - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -609,7 +609,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_bad_service_code(sk, service)) { reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; - } + } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -644,7 +644,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = skb->nh.iph->saddr; ireq->opt = NULL; - /* + /* * Step 3: Process LISTEN state * * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie @@ -846,15 +846,15 @@ static int dccp_v4_rcv(struct sk_buff *skb) } /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); - /* + /* * Step 2: - * If no socket ... + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -862,9 +862,9 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } - /* + /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ @@ -876,8 +876,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage - * o if MinCsCov = 0, only packets with CsCov = 0 are accepted - * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov */ min_cov = dccp_sk(sk)->dccps_pcrlen; if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { @@ -900,7 +900,7 @@ no_dccp_socket: goto discard_it; /* * Step 2: - * If no socket ... + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c7aaa2574f52..6b91a9dd0411 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -77,7 +77,7 @@ static inline void dccp_v6_send_check(struct sock *sk, int unused_value, } static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) + __be16 sport, __be16 dport ) { return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); } @@ -329,7 +329,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); if (skb == NULL) - return; + return; skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); @@ -353,7 +353,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, - &rxskb->nh.ipv6h->daddr); + &rxskb->nh.ipv6h->daddr); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); @@ -424,7 +424,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; struct inet6_request_sock *ireq6; struct ipv6_pinfo *np = inet6_sk(sk); - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -437,7 +437,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_bad_service_code(sk, service)) { reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; - } + } /* * There are no SYN attacks on IPv6, yet... */ @@ -787,7 +787,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if (nsk != sk) { + if (nsk != sk) { if (dccp_child_process(sk, nsk, skb)) goto reset; if (opt_skb != NULL) @@ -843,14 +843,14 @@ static int dccp_v6_rcv(struct sk_buff **pskb) DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr, dh->dccph_sport, &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport), inet6_iif(skb)); /* * Step 2: - * If no socket ... + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -860,7 +860,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ @@ -872,8 +872,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage - * o if MinCsCov = 0, only packets with CsCov = 0 are accepted - * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov */ min_cov = dccp_sk(sk)->dccps_pcrlen; if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { @@ -893,7 +893,7 @@ no_dccp_socket: goto discard_it; /* * Step 2: - * If no socket ... + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 4c9e26775f72..6656bb497c7b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -182,7 +182,7 @@ out_free: EXPORT_SYMBOL_GPL(dccp_create_openreq_child); -/* +/* * Process an incoming packet for RESPOND sockets represented * as an request_sock. */ diff --git a/net/dccp/options.c b/net/dccp/options.c index f398b43bc055..c03ba61eb6da 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -557,11 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) return -1; dp->dccps_hc_rx_insert_options = 0; } - if (dp->dccps_hc_tx_insert_options) { - if (ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb)) - return -1; - dp->dccps_hc_tx_insert_options = 0; - } /* Feature negotiation */ /* Data packets can't do feat negotiation */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 400c30b6fcae..824569659083 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -1,6 +1,6 @@ /* * net/dccp/output.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -175,14 +175,12 @@ void dccp_write_space(struct sock *sk) /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet * @sk: socket to wait for - * @timeo: for how long */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, - long *timeo) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - long delay; + unsigned long delay; int rc; while (1) { @@ -190,8 +188,6 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (sk->sk_err) goto do_error; - if (!*timeo) - goto do_nonblock; if (signal_pending(current)) goto do_interrupted; @@ -199,12 +195,9 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (rc <= 0) break; delay = msecs_to_jiffies(rc); - if (delay > *timeo || delay < 0) - goto do_nonblock; - sk->sk_write_pending++; release_sock(sk); - *timeo -= schedule_timeout(delay); + schedule_timeout(delay); lock_sock(sk); sk->sk_write_pending--; } @@ -215,11 +208,8 @@ out: do_error: rc = -EPIPE; goto out; -do_nonblock: - rc = -EAGAIN; - goto out; do_interrupted: - rc = sock_intr_errno(*timeo); + rc = -EINTR; goto out; } @@ -240,8 +230,6 @@ void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - long timeo = DCCP_XMIT_TIMEO; /* If a packet is taking longer than - this we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); @@ -251,11 +239,9 @@ void dccp_write_xmit(struct sock *sk, int block) sk_reset_timer(sk, &dp->dccps_xmit_timer, msecs_to_jiffies(err)+jiffies); break; - } else { - err = dccp_wait_for_ccid(sk, skb, &timeo); - timeo = DCCP_XMIT_TIMEO; - } - if (err) + } else + err = dccp_wait_for_ccid(sk, skb); + if (err && err != -EINTR) DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } @@ -281,8 +267,10 @@ void dccp_write_xmit(struct sock *sk, int block) if (err) DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", err); - } else + } else { + dccp_pr_debug("packet discarded\n"); kfree(skb); + } } } @@ -350,7 +338,6 @@ EXPORT_SYMBOL_GPL(dccp_make_response); static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, const enum dccp_reset_codes code) - { struct dccp_hdr *dh; struct dccp_sock *dp = dccp_sk(sk); @@ -431,14 +418,14 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* + /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: * SWL := max(GSR + 1 - floor(W/4), ISR), * AWL := max(GSS - W' + 1, ISS). * These adjustments MUST be applied only at the beginning of the * connection. - */ + */ dccp_update_gss(sk, dp->dccps_iss); dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 5ec47d9ee447..63b3fa20e14b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -196,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) sk, GFP_KERNEL); dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, sk, GFP_KERNEL); - if (unlikely(dp->dccps_hc_rx_ccid == NULL || + if (unlikely(dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL)) { ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); @@ -390,7 +390,7 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; - if (service == DCCP_SERVICE_INVALID_VALUE || + if (service == DCCP_SERVICE_INVALID_VALUE || optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) return -EINVAL; @@ -830,7 +830,7 @@ EXPORT_SYMBOL_GPL(inet_dccp_listen); static const unsigned char dccp_new_state[] = { /* current state: new state: action: */ [0] = DCCP_CLOSED, - [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, [DCCP_REQUESTING] = DCCP_CLOSED, [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, [DCCP_LISTEN] = DCCP_CLOSED, diff --git a/net/dccp/timer.c b/net/dccp/timer.c index e8f519e7f481..e5348f369c60 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -1,6 +1,6 @@ /* * net/dccp/timer.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -102,13 +102,13 @@ static void dccp_retransmit_timer(struct sock *sk) * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP * packet types. The only packets eligible for retransmission are: - * -- Requests in client-REQUEST state (sec. 8.1.1) - * -- Acks in client-PARTOPEN state (sec. 8.1.5) - * -- CloseReq in server-CLOSEREQ state (sec. 8.3) - * -- Close in node-CLOSING state (sec. 8.3) */ + * -- Requests in client-REQUEST state (sec. 8.1.1) + * -- Acks in client-PARTOPEN state (sec. 8.1.5) + * -- CloseReq in server-CLOSEREQ state (sec. 8.3) + * -- Close in node-CLOSING state (sec. 8.3) */ BUG_TRAP(sk->sk_send_head != NULL); - /* + /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ @@ -200,7 +200,7 @@ static void dccp_keepalive_timer(unsigned long data) /* Only process if socket is not in use. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - /* Try again later. */ + /* Try again later. */ inet_csk_reset_keepalive_timer(sk, HZ / 20); goto out; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 74046efdf875..8ce00d3703da 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -565,7 +565,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } else { struct sk_buff *free_it = next; - /* Old fragmnet is completely overridden with + /* Old fragment is completely overridden with * new one drop it. */ next = next->next; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 91a075edd68e..7ea2d981a932 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -657,7 +657,7 @@ static void sync_master_loop(void) if (stop_master_sync) break; - ssleep(1); + msleep_interruptible(1000); } /* clean up the sync_buff queue */ @@ -714,7 +714,7 @@ static void sync_backup_loop(void) if (stop_backup_sync) break; - ssleep(1); + msleep_interruptible(1000); } /* release the sending multicast socket */ @@ -826,7 +826,7 @@ static int fork_sync_thread(void *startup) if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { IP_VS_ERR("could not create sync_thread due to %d... " "retrying.\n", pid); - ssleep(1); + msleep_interruptible(1000); goto repeat; } @@ -849,10 +849,12 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) ip_vs_sync_state |= state; if (state == IP_VS_STATE_MASTER) { - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, sizeof(ip_vs_master_mcast_ifn)); + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, + sizeof(ip_vs_master_mcast_ifn)); ip_vs_master_syncid = syncid; } else { - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, sizeof(ip_vs_backup_mcast_ifn)); + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, + sizeof(ip_vs_backup_mcast_ifn)); ip_vs_backup_syncid = syncid; } @@ -860,7 +862,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { IP_VS_ERR("could not create fork_sync_thread due to %d... " "retrying.\n", pid); - ssleep(1); + msleep_interruptible(1000); goto repeat; } @@ -880,7 +882,8 @@ int stop_sync_thread(int state) IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_INFO("stopping sync thread %d ...\n", - (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); + (state == IP_VS_STATE_MASTER) ? + sync_master_pid : sync_backup_pid); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&stop_sync_wait, &wait); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 363df9976c9d..f6026d4ac428 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -401,7 +401,7 @@ config IP_NF_NAT # NAT + specific targets: nf_conntrack config NF_NAT tristate "Full NAT" - depends on IP_NF_IPTABLES && NF_CONNTRACK + depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 help The Full NAT option allows masquerading, port forwarding and other forms of full Network Address Port Translation. It is controlled by diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 71b76ade00e1..9aa22398b3dc 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -358,6 +358,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct arpt_standard_target *t = (void *)arpt_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { printk("arptables: loop hook %u pos %u %08X.\n", @@ -368,11 +369,11 @@ static int mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct arpt_entry) + if ((e->target_offset == sizeof(struct arpt_entry) && (strcmp(t->target.u.user.name, ARPT_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->arp)) { + && unconditional(&e->arp)) || visited) { unsigned int oldpos, size; if (t->verdict < -NF_MAX_VERDICT - 1) { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0ff2956d35e5..09696f16aa95 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -384,6 +384,7 @@ mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct ipt_standard_target *t = (void *)ipt_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { printk("iptables: loop hook %u pos %u %08X.\n", @@ -394,11 +395,11 @@ mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ipt_entry) + if ((e->target_offset == sizeof(struct ipt_entry) && (strcmp(t->target.u.user.name, IPT_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->ip)) { + && unconditional(&e->ip)) || visited) { unsigned int oldpos, size; if (t->verdict < -NF_MAX_VERDICT - 1) { @@ -484,7 +485,47 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) } static inline int -check_match(struct ipt_entry_match *m, +check_entry(struct ipt_entry *e, const char *name) +{ + struct ipt_entry_target *t; + + if (!ip_checkentry(&e->ip)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) + return -EINVAL; + + t = ipt_get_target(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static inline int check_match(struct ipt_entry_match *m, const char *name, + const struct ipt_ip *ip, unsigned int hookmask) +{ + struct ipt_match *match; + int ret; + + match = m->u.kernel.match; + ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), + name, hookmask, ip->proto, + ip->invflags & IPT_INV_PROTO); + if (!ret && m->u.kernel.match->checkentry + && !m->u.kernel.match->checkentry(name, ip, match, m->data, + hookmask)) { + duprintf("ip_tables: check failed for `%s'.\n", + m->u.kernel.match->name); + ret = -EINVAL; + } + return ret; +} + +static inline int +find_check_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, @@ -497,26 +538,15 @@ check_match(struct ipt_entry_match *m, m->u.user.revision), "ipt_%s", m->u.user.name); if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); + duprintf("find_check_match: `%s' not found\n", m->u.user.name); return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); + ret = check_match(m, name, ip, hookmask); if (ret) goto err; - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -524,10 +554,29 @@ err: return ret; } -static struct ipt_target ipt_standard_target; +static inline int check_target(struct ipt_entry *e, const char *name) +{ + struct ipt_entry_target *t; + struct ipt_target *target; + int ret; + + t = ipt_get_target(e); + target = t->u.kernel.target; + ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), + name, e->comefrom, e->ip.proto, + e->ip.invflags & IPT_INV_PROTO); + if (!ret && t->u.kernel.target->checkentry + && !t->u.kernel.target->checkentry(name, e, target, + t->data, e->comefrom)) { + duprintf("ip_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + ret = -EINVAL; + } + return ret; +} static inline int -check_entry(struct ipt_entry *e, const char *name, unsigned int size, +find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, unsigned int *i) { struct ipt_entry_target *t; @@ -535,49 +584,32 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, int ret; unsigned int j; - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) - return -EINVAL; + ret = check_entry(e, name); + if (ret) + return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); + ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, + e->comefrom, &j); if (ret != 0) goto cleanup_matches; t = ipt_get_target(e); - ret = -EINVAL; - if (e->target_offset + t->u.target_size > e->next_offset) - goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET, t->u.user.name, t->u.user.revision), "ipt_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); + ret = check_target(e, name); if (ret) goto err; - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -712,7 +744,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + find_check_entry, name, size, &i); if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, @@ -1452,14 +1484,9 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct compat_xt_entry_target) > - e->next_offset) - return -EINVAL; + ret = check_entry(e, name); + if (ret) + return ret; off = 0; entry_offset = (void *)e - (void *)base; @@ -1470,15 +1497,13 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, goto cleanup_matches; t = ipt_get_target(e); - ret = -EINVAL; - if (e->target_offset + t->u.target_size > e->next_offset) - goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET, t->u.user.name, t->u.user.revision), "ipt_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } @@ -1555,57 +1580,15 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, return ret; } -static inline int compat_check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) -{ - struct ipt_match *match; - int ret; - - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { - duprintf("ip_tables: compat: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - } - return ret; -} - -static inline int compat_check_target(struct ipt_entry *e, const char *name) -{ - struct ipt_entry_target *t; - struct ipt_target *target; - int ret; - - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, - t->data, e->comefrom)) { - duprintf("ip_tables: compat: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - } - return ret; -} - static inline int compat_check_entry(struct ipt_entry *e, const char *name) { int ret; - ret = IPT_MATCH_ITERATE(e, compat_check_match, name, &e->ip, - e->comefrom); + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom); if (ret) return ret; - return compat_check_target(e, name); + return check_target(e, name); } static int diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index fef56ae61abe..b1c11160b9de 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -447,6 +447,12 @@ checkentry(const char *tablename, cipinfo->config = config; } + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } + return 1; } @@ -460,6 +466,8 @@ static void destroy(const struct xt_target *target, void *targinfo) clusterip_config_entry_put(cipinfo->config); clusterip_config_put(cipinfo->config); + + nf_ct_l3proto_module_put(target->family); } static struct ipt_target clusterip_tgt = { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1aaff0a2e098..2daa0dc19d33 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1325,7 +1325,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (time_after(jiffies, + if (rt->u.dst.rate_tokens == 0 || + time_after(jiffies, (rt->u.dst.rate_last + (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 090c690627e5..b67e0dd743be 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2364,8 +2364,9 @@ struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) EXPORT_SYMBOL(__tcp_get_md5sig_pool); -void __tcp_put_md5sig_pool(void) { - __tcp_free_md5sig_pool(tcp_md5sig_pool); +void __tcp_put_md5sig_pool(void) +{ + tcp_free_md5sig_pool(); } EXPORT_SYMBOL(__tcp_put_md5sig_pool); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a1222d6968c4..bf7a22412bcb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -928,6 +928,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) if (tp->md5sig_info->entries4 == 0) { kfree(tp->md5sig_info->keys4); tp->md5sig_info->keys4 = NULL; + tp->md5sig_info->alloced4 = 0; } else if (tp->md5sig_info->entries4 != i) { /* Need to do some manipulation */ memcpy(&tp->md5sig_info->keys4[i], @@ -1185,7 +1186,7 @@ done_opts: return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " + LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1eafcfc95e81..352690e2ab82 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -978,12 +978,27 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_UNICAST_HOPS: - val = np->hop_limit; - break; - case IPV6_MULTICAST_HOPS: - val = np->mcast_hops; + { + struct dst_entry *dst; + + if (optname == IPV6_UNICAST_HOPS) + val = np->hop_limit; + else + val = np->mcast_hops; + + dst = sk_dst_get(sk); + if (dst) { + if (val < 0) + val = dst_metric(dst, RTAX_HOPLIMIT); + if (val < 0) + val = ipv6_get_hoplimit(dst->dev); + dst_release(dst); + } + if (val < 0) + val = ipv6_devconf.hop_limit; break; + } case IPV6_MULTICAST_LOOP: val = np->mc_loop; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index fc3e5eb4bc3f..adcd6131df2a 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -7,7 +7,7 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" config NF_CONNTRACK_IPV6 tristate "IPv6 connection tracking support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -21,6 +21,7 @@ config NF_CONNTRACK_IPV6 config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" + depends on INET && IPV6 && NETFILTER && EXPERIMENTAL ---help--- This option adds a queue handler to the kernel for IPv6 @@ -41,7 +42,7 @@ config IP6_NF_QUEUE config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" - depends on NETFILTER_XTABLES + depends on INET && IPV6 && EXPERIMENTAL && NETFILTER_XTABLES help ip6tables is a general, extensible packet identification framework. Currently only the packet filtering and packet mangling subsystem diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4eec4b3988b8..99502c5da4c4 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -413,6 +413,7 @@ mark_source_chains(struct xt_table_info *newinfo, unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); + int visited = e->comefrom & (1 << hook); if (!(valid_hooks & (1 << hook))) continue; @@ -433,11 +434,11 @@ mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ip6t_entry) + if ((e->target_offset == sizeof(struct ip6t_entry) && (strcmp(t->target.u.user.name, IP6T_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->ipv6)) { + && unconditional(&e->ipv6)) || visited) { unsigned int oldpos, size; if (t->verdict < -NF_MAX_VERDICT - 1) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9f80518aacbd..8c3d56871b50 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -494,7 +494,7 @@ do { \ goto out; \ pn = fn->parent; \ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(pn->subtree, NULL, saddr); \ + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ else \ fn = pn; \ if (fn->fn_flags & RTN_RTINFO) \ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3a66878a1829..1b853c34d301 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,5 +1,5 @@ menu "Core Netfilter Configuration" - depends on NET && NETFILTER + depends on NET && INET && NETFILTER config NETFILTER_NETLINK tristate "Netfilter netlink interface" diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index b5548239d412..0534bfa65cce 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -96,6 +96,11 @@ checkentry(const char *tablename, { struct xt_connmark_target_info *matchinfo = targinfo; + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } if (matchinfo->mode == XT_CONNMARK_RESTORE) { if (strcmp(tablename, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " @@ -111,6 +116,12 @@ checkentry(const char *tablename, return 1; } +static void +destroy(const struct xt_target *target, void *targinfo) +{ + nf_ct_l3proto_module_put(target->family); +} + #ifdef CONFIG_COMPAT struct compat_xt_connmark_target_info { compat_ulong_t mark, mask; @@ -147,6 +158,7 @@ static struct xt_target xt_connmark_target[] = { .name = "CONNMARK", .family = AF_INET, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), #ifdef CONFIG_COMPAT @@ -160,6 +172,7 @@ static struct xt_target xt_connmark_target[] = { .name = "CONNMARK", .family = AF_INET6, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), .me = THIS_MODULE @@ -168,7 +181,6 @@ static struct xt_target xt_connmark_target[] = { static int __init xt_connmark_init(void) { - need_conntrack(); return xt_register_targets(xt_connmark_target, ARRAY_SIZE(xt_connmark_target)); } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 467386266674..a3fe3c334b09 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -93,6 +93,11 @@ static int checkentry(const char *tablename, const void *entry, { struct xt_connsecmark_target_info *info = targinfo; + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: @@ -106,11 +111,18 @@ static int checkentry(const char *tablename, const void *entry, return 1; } +static void +destroy(const struct xt_target *target, void *targinfo) +{ + nf_ct_l3proto_module_put(target->family); +} + static struct xt_target xt_connsecmark_target[] = { { .name = "CONNSECMARK", .family = AF_INET, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connsecmark_target_info), .table = "mangle", @@ -120,6 +132,7 @@ static struct xt_target xt_connsecmark_target[] = { .name = "CONNSECMARK", .family = AF_INET6, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connsecmark_target_info), .table = "mangle", @@ -129,7 +142,6 @@ static struct xt_target xt_connsecmark_target[] = { static int __init xt_connsecmark_init(void) { - need_conntrack(); return xt_register_targets(xt_connsecmark_target, ARRAY_SIZE(xt_connsecmark_target)); } diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index dcc497ea8183..d93cb096a675 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -139,15 +139,28 @@ static int check(const char *tablename, sinfo->direction != XT_CONNBYTES_DIR_BOTH) return 0; + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", match->family); + return 0; + } + return 1; } +static void +destroy(const struct xt_match *match, void *matchinfo) +{ + nf_ct_l3proto_module_put(match->family); +} + static struct xt_match xt_connbytes_match[] = { { .name = "connbytes", .family = AF_INET, .checkentry = check, .match = match, + .destroy = destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE }, @@ -156,6 +169,7 @@ static struct xt_match xt_connbytes_match[] = { .family = AF_INET6, .checkentry = check, .match = match, + .destroy = destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE }, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index a8f03057dbde..36c2defff238 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -63,22 +63,18 @@ checkentry(const char *tablename, printk(KERN_WARNING "connmark: only support 32bit mark\n"); return 0; } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } #ifdef CONFIG_COMPAT @@ -140,7 +136,6 @@ static struct xt_match xt_connmark_match[] = { static int __init xt_connmark_init(void) { - need_conntrack(); return xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0ea501a2fda5..3dc2357b8de8 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -20,6 +20,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_conntrack.h> +#include <net/netfilter/nf_conntrack_compat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); @@ -228,21 +229,17 @@ checkentry(const char *tablename, void *matchinfo, unsigned int hook_mask) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match conntrack_match = { @@ -257,7 +254,6 @@ static struct xt_match conntrack_match = { static int __init xt_conntrack_init(void) { - need_conntrack(); return xt_register_match(&conntrack_match); } diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 5d7818b73e3a..04bc32ba7195 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -24,6 +24,7 @@ #endif #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_helper.h> +#include <net/netfilter/nf_conntrack_compat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); @@ -143,13 +144,11 @@ static int check(const char *tablename, { struct xt_helper_info *info = matchinfo; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif info->name[29] = '\0'; return 1; } @@ -157,9 +156,7 @@ static int check(const char *tablename, static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match xt_helper_match[] = { @@ -185,7 +182,6 @@ static struct xt_match xt_helper_match[] = { static int __init xt_helper_init(void) { - need_conntrack(); return xt_register_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index fd8f954cded5..b9b3ffc5451d 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -113,20 +113,16 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return 0; - if (brnf_deferred_hooks == 0 && - info->bitmask & XT_PHYSDEV_OP_OUT && + if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " - "traffic is deprecated and breaks other things, it will " - "be removed in January 2007. See Documentation/" - "feature-removal-schedule.txt for details. This doesn't " - "affect you in case you're using it for purely bridged " - "traffic.\n"); - brnf_deferred_hooks = 1; + "traffic is not supported anymore.\n"); + if (hook_mask & (1 << NF_IP_LOCAL_OUT)) + return 0; } return 1; } diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index d9010b16a1f9..df37b912163a 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -50,22 +50,18 @@ static int check(const char *tablename, void *matchinfo, unsigned int hook_mask) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match xt_state_match[] = { @@ -91,7 +87,6 @@ static struct xt_match xt_state_match[] = { static int __init xt_state_init(void) { - need_conntrack(); return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d50f801f181..43bbe2c9e49a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1377,6 +1377,15 @@ static struct notifier_block nr_dev_notifier = { static struct net_device **dev_nr; +static struct ax25_protocol nr_pid = { + .pid = AX25_P_NETROM, + .func = nr_route_frame +}; + +static struct ax25_linkfail nr_linkfail_notifier = { + .func = nr_link_failed, +}; + static int __init nr_proto_init(void) { int i; @@ -1424,8 +1433,8 @@ static int __init nr_proto_init(void) register_netdevice_notifier(&nr_dev_notifier); - ax25_protocol_register(AX25_P_NETROM, nr_route_frame); - ax25_linkfail_register(nr_link_failed); + ax25_register_pid(&nr_pid); + ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL nr_register_sysctl(); @@ -1474,7 +1483,7 @@ static void __exit nr_exit(void) nr_unregister_sysctl(); #endif - ax25_linkfail_release(nr_link_failed); + ax25_linkfail_release(&nr_linkfail_notifier); ax25_protocol_release(AX25_P_NETROM); unregister_netdevice_notifier(&nr_dev_notifier); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 9b8eb54971ab..4700d5225b78 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -128,25 +128,37 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short return -37; } -static int nr_set_mac_address(struct net_device *dev, void *addr) +static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; + int err; + + if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) + return 0; + + if (dev->flags & IFF_UP) { + err = ax25_listen_register((ax25_address *)sa->sa_data, NULL); + if (err) + return err; - if (dev->flags & IFF_UP) ax25_listen_release((ax25_address *)dev->dev_addr, NULL); + } memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); - if (dev->flags & IFF_UP) - ax25_listen_register((ax25_address *)dev->dev_addr, NULL); - return 0; } static int nr_open(struct net_device *dev) { + int err; + + err = ax25_listen_register((ax25_address *)dev->dev_addr, NULL); + if (err) + return err; + netif_start_queue(dev); - ax25_listen_register((ax25_address *)dev->dev_addr, NULL); + return 0; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0096105bcd47..8f88964099ef 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -87,8 +87,9 @@ static void nr_remove_neigh(struct nr_neigh *); * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ -static int nr_add_node(ax25_address *nr, const char *mnemonic, ax25_address *ax25, - ax25_digi *ax25_digi, struct net_device *dev, int quality, int obs_count) +static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, + ax25_address *ax25, ax25_digi *ax25_digi, struct net_device *dev, + int quality, int obs_count) { struct nr_node *nr_node; struct nr_neigh *nr_neigh; @@ -406,7 +407,8 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n /* * Lock a neighbour with a quality. */ -static int nr_add_neigh(ax25_address *callsign, ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality) +static int __must_check nr_add_neigh(ax25_address *callsign, + ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality) { struct nr_neigh *nr_neigh; @@ -777,9 +779,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) nr_src = (ax25_address *)(skb->data + 0); nr_dest = (ax25_address *)(skb->data + 7); - if (ax25 != NULL) - nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, - ax25->ax25_dev->dev, 0, sysctl_netrom_obsolescence_count_initialiser); + if (ax25 != NULL) { + ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, + ax25->ax25_dev->dev, 0, + sysctl_netrom_obsolescence_count_initialiser); + if (ret) + return ret; + } if ((dev = nr_dev_get(nr_dest)) != NULL) { /* Its for me */ if (ax25 == NULL) /* Its from me */ @@ -844,6 +850,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) ret = (nr_neigh->ax25 != NULL); nr_node_unlock(nr_node); nr_node_put(nr_node); + return ret; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 08a542855654..9e279464c9d1 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1314,7 +1314,8 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (copy_from_user(&rose_callsign, argp, sizeof(ax25_address))) return -EFAULT; if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) - ax25_listen_register(&rose_callsign, NULL); + return ax25_listen_register(&rose_callsign, NULL); + return 0; case SIOCRSGL2CALL: @@ -1481,6 +1482,15 @@ static struct notifier_block rose_dev_notifier = { static struct net_device **dev_rose; +static struct ax25_protocol rose_pid = { + .pid = AX25_P_ROSE, + .func = rose_route_frame +}; + +static struct ax25_linkfail rose_linkfail_notifier = { + .func = rose_link_failed +}; + static int __init rose_proto_init(void) { int i; @@ -1530,8 +1540,8 @@ static int __init rose_proto_init(void) sock_register(&rose_family_ops); register_netdevice_notifier(&rose_dev_notifier); - ax25_protocol_register(AX25_P_ROSE, rose_route_frame); - ax25_linkfail_register(rose_link_failed); + ax25_register_pid(&rose_pid); + ax25_linkfail_register(&rose_linkfail_notifier); #ifdef CONFIG_SYSCTL rose_register_sysctl(); @@ -1579,7 +1589,7 @@ static void __exit rose_exit(void) rose_rt_free(); ax25_protocol_release(AX25_P_ROSE); - ax25_linkfail_release(rose_link_failed); + ax25_linkfail_release(&rose_linkfail_notifier); if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) ax25_listen_release(&rose_callsign, NULL); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 7c279e2659ec..50824d345fa6 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -93,20 +93,34 @@ static int rose_rebuild_header(struct sk_buff *skb) static int rose_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; + int err; - rose_del_loopback_node((rose_address *)dev->dev_addr); + if (!memcpy(dev->dev_addr, sa->sa_data, dev->addr_len)) + return 0; - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + if (dev->flags & IFF_UP) { + err = rose_add_loopback_node((rose_address *)dev->dev_addr); + if (err) + return err; + + rose_del_loopback_node((rose_address *)dev->dev_addr); + } - rose_add_loopback_node((rose_address *)dev->dev_addr); + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); return 0; } static int rose_open(struct net_device *dev) { + int err; + + err = rose_add_loopback_node((rose_address *)dev->dev_addr); + if (err) + return err; + netif_start_queue(dev); - rose_add_loopback_node((rose_address *)dev->dev_addr); + return 0; } diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 103b4d38f88a..3e41bd93ab9f 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -79,7 +79,8 @@ static void rose_loopback_timer(unsigned long param) skb->h.raw = skb->data; - if ((sk = rose_find_socket(lci_o, rose_loopback_neigh)) != NULL) { + sk = rose_find_socket(lci_o, &rose_loopback_neigh); + if (sk) { if (rose_process_rx_frame(sk, skb) == 0) kfree_skb(skb); continue; @@ -87,7 +88,7 @@ static void rose_loopback_timer(unsigned long param) if (frametype == ROSE_CALL_REQUEST) { if ((dev = rose_dev_get(dest)) != NULL) { - if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) + if (rose_rx_call_request(skb, dev, &rose_loopback_neigh, lci_o) == 0) kfree_skb(skb); } else { kfree_skb(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 7252344779a0..8028c0d425dc 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -46,13 +46,13 @@ static DEFINE_SPINLOCK(rose_neigh_list_lock); static struct rose_route *rose_route_list; static DEFINE_SPINLOCK(rose_route_list_lock); -struct rose_neigh *rose_loopback_neigh; +struct rose_neigh rose_loopback_neigh; /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ -static int rose_add_node(struct rose_route_struct *rose_route, +static int __must_check rose_add_node(struct rose_route_struct *rose_route, struct net_device *dev) { struct rose_node *rose_node, *rose_tmpn, *rose_tmpp; @@ -361,33 +361,30 @@ out: /* * Add the loopback neighbour. */ -int rose_add_loopback_neigh(void) +void rose_add_loopback_neigh(void) { - if ((rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_ATOMIC)) == NULL) - return -ENOMEM; + struct rose_neigh *sn = &rose_loopback_neigh; - rose_loopback_neigh->callsign = null_ax25_address; - rose_loopback_neigh->digipeat = NULL; - rose_loopback_neigh->ax25 = NULL; - rose_loopback_neigh->dev = NULL; - rose_loopback_neigh->count = 0; - rose_loopback_neigh->use = 0; - rose_loopback_neigh->dce_mode = 1; - rose_loopback_neigh->loopback = 1; - rose_loopback_neigh->number = rose_neigh_no++; - rose_loopback_neigh->restarted = 1; + sn->callsign = null_ax25_address; + sn->digipeat = NULL; + sn->ax25 = NULL; + sn->dev = NULL; + sn->count = 0; + sn->use = 0; + sn->dce_mode = 1; + sn->loopback = 1; + sn->number = rose_neigh_no++; + sn->restarted = 1; - skb_queue_head_init(&rose_loopback_neigh->queue); + skb_queue_head_init(&sn->queue); - init_timer(&rose_loopback_neigh->ftimer); - init_timer(&rose_loopback_neigh->t0timer); + init_timer(&sn->ftimer); + init_timer(&sn->t0timer); spin_lock_bh(&rose_neigh_list_lock); - rose_loopback_neigh->next = rose_neigh_list; - rose_neigh_list = rose_loopback_neigh; + sn->next = rose_neigh_list; + rose_neigh_list = sn; spin_unlock_bh(&rose_neigh_list_lock); - - return 0; } /* @@ -421,13 +418,13 @@ int rose_add_loopback_node(rose_address *address) rose_node->mask = 10; rose_node->count = 1; rose_node->loopback = 1; - rose_node->neighbour[0] = rose_loopback_neigh; + rose_node->neighbour[0] = &rose_loopback_neigh; /* Insert at the head of list. Address is always mask=10 */ rose_node->next = rose_node_list; rose_node_list = rose_node; - rose_loopback_neigh->count++; + rose_loopback_neigh.count++; out: spin_unlock_bh(&rose_node_list_lock); @@ -458,7 +455,7 @@ void rose_del_loopback_node(rose_address *address) rose_remove_node(rose_node); - rose_loopback_neigh->count--; + rose_loopback_neigh.count--; out: spin_unlock_bh(&rose_node_list_lock); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ad0057db0f91..5db95caed0a3 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -298,6 +298,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_flags = sp->default_flags; asoc->default_context = sp->default_context; asoc->default_timetolive = sp->default_timetolive; + asoc->default_rcv_context = sp->default_rcv_context; return asoc; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 3c3e560087ca..d8d36dee5ab6 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -78,8 +78,44 @@ #include <asm/uaccess.h> +/* Event handler for inet6 address addition/deletion events. */ +int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, + void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct sctp_sockaddr_entry *addr; + struct list_head *pos, *temp; + + switch (ev) { + case NETDEV_UP: + addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addr) { + addr->a.v6.sin6_family = AF_INET6; + addr->a.v6.sin6_port = 0; + memcpy(&addr->a.v6.sin6_addr, &ifa->addr, + sizeof(struct in6_addr)); + addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; + list_add_tail(&addr->list, &sctp_local_addr_list); + } + break; + case NETDEV_DOWN: + list_for_each_safe(pos, temp, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + list_del(pos); + kfree(addr); + break; + } + } + + break; + } + + return NOTIFY_DONE; +} + static struct notifier_block sctp_inet6addr_notifier = { - .notifier_call = sctp_inetaddr_event, + .notifier_call = sctp_inet6addr_event, }; /* ICMP error handler. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f2ba8615895b..3a3db56729ce 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -163,7 +163,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void __sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(void) { struct net_device *dev; struct list_head *pos; @@ -179,17 +179,8 @@ static void __sctp_get_local_addr_list(void) read_unlock(&dev_base_lock); } -static void sctp_get_local_addr_list(void) -{ - unsigned long flags; - - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_get_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); -} - /* Free the existing local addresses. */ -static void __sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(void) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; @@ -201,27 +192,15 @@ static void __sctp_free_local_addr_list(void) } } -/* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) -{ - unsigned long flags; - - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_free_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); -} - /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos; - unsigned long flags; + struct list_head *pos, *temp; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if @@ -242,7 +221,6 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return error; } @@ -622,18 +600,36 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. - * Basically, whenever there is an event, we re-build our local address list. - */ +/* Event handler for inet address addition/deletion events. */ int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, void *ptr) { - unsigned long flags; + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct sctp_sockaddr_entry *addr; + struct list_head *pos, *temp; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_free_local_addr_list(); - __sctp_get_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); + switch (ev) { + case NETDEV_UP: + addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addr) { + addr->a.v4.sin_family = AF_INET; + addr->a.v4.sin_port = 0; + addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + list_add_tail(&addr->list, &sctp_local_addr_list); + } + break; + case NETDEV_DOWN: + list_for_each_safe(pos, temp, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + list_del(pos); + kfree(addr); + break; + } + } + + break; + } return NOTIFY_DONE; } @@ -1172,13 +1168,12 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); + + sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ register_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_get_local_addr_list(); - __unsafe(THIS_MODULE); status = 0; out: @@ -1263,6 +1258,7 @@ module_exit(sctp_exit); * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly. */ MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); +MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); MODULE_LICENSE("GPL"); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1e8132b8c4d9..bdd8bd428b64 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2746,6 +2746,46 @@ static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval, return 0; } +/* + * 7.1.29. Set or Get the default context (SCTP_CONTEXT) + * + * The context field in the sctp_sndrcvinfo structure is normally only + * used when a failed message is retrieved holding the value that was + * sent down on the actual send call. This option allows the setting of + * a default context on an association basis that will be received on + * reading messages from the peer. This is especially helpful in the + * one-2-many model for an application to keep some reference to an + * internal state machine that is processing messages on the + * association. Note that the setting of this value only effects + * received messages from the peer and does not effect the value that is + * saved with outbound messages. + */ +static int sctp_setsockopt_context(struct sock *sk, char __user *optval, + int optlen) +{ + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_assoc_value)) + return -EINVAL; + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + asoc->default_rcv_context = params.assoc_value; + } else { + sp->default_rcv_context = params.assoc_value; + } + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -2857,6 +2897,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_ADAPTION_LAYER: retval = sctp_setsockopt_adaption_layer(sk, optval, optlen); break; + case SCTP_CONTEXT: + retval = sctp_setsockopt_context(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; @@ -3016,6 +3059,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_context = 0; sp->default_timetolive = 0; + sp->default_rcv_context = 0; + /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or * overridden by the SCTP_INIT CMSG. @@ -3821,10 +3866,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, sctp_assoc_t id; struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; + struct list_head *pos, *temp; struct sctp_sockaddr_entry *addr; rwlock_t *addr_lock; - unsigned long flags; int cnt = 0; if (len != sizeof(sctp_assoc_t)) @@ -3859,8 +3903,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); @@ -3869,8 +3912,6 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, continue; cnt++; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); } else { cnt = 1; } @@ -3892,15 +3933,13 @@ done: static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs, void __user *to) { - struct list_head *pos; + struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; - unsigned long flags; union sctp_addr temp; int cnt = 0; int addrlen; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, next, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) @@ -3909,16 +3948,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) { - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); + if (copy_to_user(to, &temp, addrlen)) return -EFAULT; - } + to += addrlen; cnt ++; if (cnt >= max_addrs) break; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return cnt; } @@ -3926,15 +3962,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, void __user **to, size_t space_left) { - struct list_head *pos; + struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; - unsigned long flags; union sctp_addr temp; int cnt = 0; int addrlen; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, next, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) @@ -3945,16 +3979,13 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if(space_left<addrlen) return -ENOMEM; - if (copy_to_user(*to, &temp, addrlen)) { - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); + if (copy_to_user(*to, &temp, addrlen)) return -EFAULT; - } + *to += addrlen; cnt ++; space_left -= addrlen; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return cnt; } @@ -4435,6 +4466,42 @@ static int sctp_getsockopt_mappedv4(struct sock *sk, int len, } /* + * 7.1.29. Set or Get the default context (SCTP_CONTEXT) + * (chapter and verse is quoted at sctp_setsockopt_context()) + */ +static int sctp_getsockopt_context(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; + + if (len != sizeof(struct sctp_assoc_value)) + return -EINVAL; + + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + params.assoc_value = asoc->default_rcv_context; + } else { + params.assoc_value = sp->default_rcv_context; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + + return 0; +} + +/* * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG) * * This socket option specifies the maximum size to put in any outgoing @@ -4572,6 +4639,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_adaption_layer(sk, len, optval, optlen); break; + case SCTP_CONTEXT: + retval = sctp_getsockopt_context(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index e255a709f1b7..93ac63b055ba 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -849,8 +849,10 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); + /* context value that is set via SCTP_CONTEXT socket option. */ + sinfo.sinfo_context = event->asoc->default_rcv_context; + /* These fields are not used while receiving. */ - sinfo.sinfo_context = 0; sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 700353b330fd..066c64a97fd8 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -804,19 +804,19 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) - goto out; + return stat; if (integ_len > buf->len) - goto out; + return stat; if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) BUG(); /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) BUG(); if (mic.len > RPC_MAX_AUTH_SIZE) - goto out; + return stat; mic.data = kmalloc(mic.len, GFP_KERNEL); if (!mic.data) - goto out; + return stat; if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) goto out; maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); @@ -826,6 +826,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; stat = 0; out: + kfree(mic.data); return stat; } @@ -1065,7 +1066,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { case -EAGAIN: - goto drop; + case -ETIMEDOUT: case -ENOENT: goto drop; case 0: diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80aff0474572..14274490f92e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static int cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -185,6 +185,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); * * Returns 0 if the cache_head can be used, or cache_puts it and returns * -EAGAIN if upcall is pending, + * -ETIMEDOUT if upcall failed and should be retried, * -ENOENT if cache entry was negative */ int cache_check(struct cache_detail *detail, @@ -236,7 +237,8 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) - cache_defer_req(rqstp, h); + if (cache_defer_req(rqstp, h) != 0) + rv = -ETIMEDOUT; if (rv) cache_put(h, detail); @@ -523,14 +525,21 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -static void cache_defer_req(struct cache_req *req, struct cache_head *item) +static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; int hash = DFR_HASH(item); + if (cache_defer_cnt >= DFR_MAX) { + /* too much in the cache, randomly drop this one, + * or continue and drop the oldest below + */ + if (net_random()&1) + return -ETIMEDOUT; + } dreq = req->defer(req); if (dreq == NULL) - return; + return -ETIMEDOUT; dreq->item = item; dreq->recv_time = get_seconds(); @@ -546,17 +555,8 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* it is in, now maybe clean up */ dreq = NULL; if (++cache_defer_cnt > DFR_MAX) { - /* too much in the cache, randomly drop - * first or last - */ - if (net_random()&1) - dreq = list_entry(cache_defer_list.next, - struct cache_deferred_req, - recent); - else - dreq = list_entry(cache_defer_list.prev, - struct cache_deferred_req, - recent); + dreq = list_entry(cache_defer_list.prev, + struct cache_deferred_req, recent); list_del(&dreq->recent); list_del(&dreq->hash); cache_defer_cnt--; @@ -571,6 +571,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* must have just been validated... */ cache_revisit_request(item); } + return 0; } static void cache_revisit_request(struct cache_head *item) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index eb44ec929ca1..f3001f3626f6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -308,7 +308,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, serv->sv_nrpools = npools; serv->sv_pools = - kcalloc(sizeof(struct svc_pool), serv->sv_nrpools, + kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), GFP_KERNEL); if (!serv->sv_pools) { kfree(serv); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a0a953a430c2..0d1e8fb83b93 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -53,6 +53,10 @@ struct auth_domain *unix_domain_find(char *name) return NULL; kref_init(&new->h.ref); new->h.name = kstrdup(name, GFP_KERNEL); + if (new->h.name == NULL) { + kfree(new); + return NULL; + } new->h.flavour = &svcauth_unix; new->addr_changes = 0; rv = auth_domain_lookup(name, &new->h); @@ -435,6 +439,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) default: BUG(); case -EAGAIN: + case -ETIMEDOUT: return SVC_DROP; case -ENOENT: return SVC_DENIED; diff --git a/net/tipc/config.c b/net/tipc/config.c index 458a2c46cef3..baf55c459c8b 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -208,7 +208,7 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg, if (mng.link_subscriptions > 64) break; - sub = (struct subscr_data *)kmalloc(sizeof(*sub), + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (sub == NULL) { warn("Memory squeeze; dropped remote link subscription\n"); |