diff options
Diffstat (limited to 'net')
164 files changed, 4854 insertions, 2472 deletions
diff --git a/net/802/p8022.c b/net/802/p8022.c index b24817c63ca8..2530f35241cd 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -56,7 +56,7 @@ struct datalink_proto *register_8022_client(unsigned char type, void unregister_8022_client(struct datalink_proto *proto) { - llc_sap_close(proto->sap); + llc_sap_put(proto->sap); kfree(proto); } diff --git a/net/802/psnap.c b/net/802/psnap.c index ab80b1fab53c..4d638944d933 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -106,7 +106,7 @@ module_init(snap_init); static void __exit snap_exit(void) { - llc_sap_close(snap_sap); + llc_sap_put(snap_sap); } module_exit(snap_exit); diff --git a/net/802/tr.c b/net/802/tr.c index 1bb7dc1b85cd..1eaa3d19d8bf 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -238,7 +238,7 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) return trllc->ethertype; } - return ntohs(ETH_P_802_2); + return ntohs(ETH_P_TR_802_2); } /* diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 145f5cde96cf..b74864889670 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vid; struct net_device_stats *stats; unsigned short vlan_TCI; - unsigned short proto; + __be16 proto; /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); diff --git a/net/Kconfig b/net/Kconfig index 2bdd5623fdd5..60f6f321bd76 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -140,6 +140,7 @@ config BRIDGE_NETFILTER If unsure, say N. +source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig" @@ -206,8 +207,6 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. -source "net/netfilter/Kconfig" - endmenu endmenu diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 1d31b3a3f1e5..7982656b9c83 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -100,8 +100,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, continue; if (to->sat_addr.s_net == ATADDR_ANYNET && - to->sat_addr.s_node == ATADDR_BCAST && - at->src_net == atif->address.s_net) + to->sat_addr.s_node == ATADDR_BCAST) goto found; if (to->sat_addr.s_net == at->src_net && @@ -1443,8 +1442,10 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, else atif = atalk_find_interface(ddp->deh_dnet, ddp->deh_dnode); - /* Not ours, so we route the packet via the correct AppleTalk iface */ if (!atif) { + /* Not ours, so we route the packet via the correct + * AppleTalk iface + */ atalk_route_packet(skb, dev, ddp, &ddphv, origlen); goto out; } @@ -1592,9 +1593,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (usat->sat_addr.s_net || usat->sat_addr.s_node == ATADDR_ANYNODE) { rt = atrtr_find(&usat->sat_addr); - if (!rt) - return -ENETUNREACH; - dev = rt->dev; } else { struct atalk_addr at_hint; @@ -1603,11 +1601,12 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr at_hint.s_net = at->src_net; rt = atrtr_find(&at_hint); - if (!rt) - return -ENETUNREACH; - dev = rt->dev; } + if (!rt) + return -ENETUNREACH; + + dev = rt->dev; SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", sk, size, dev->name); @@ -1677,6 +1676,20 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk); /* loop back */ skb_orphan(skb); + if (ddp->deh_dnode == ATADDR_BCAST) { + struct atalk_addr at_lo; + + at_lo.s_node = 0; + at_lo.s_net = 0; + + rt = atrtr_find(&at_lo); + if (!rt) { + kfree_skb(skb); + return -ENETUNREACH; + } + dev = rt->dev; + skb->dev = dev; + } ddp_dl->request(ddp_dl, skb, dev->dev_addr); } else { SOCK_DEBUG(sk, "SK %p: send out.\n", sk); diff --git a/net/atm/addr.c b/net/atm/addr.c index 1c8867f7f54a..3060fd0ba4b9 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -44,29 +44,43 @@ static void notify_sigd(struct atm_dev *dev) sigd_enq(NULL, as_itf_notify, NULL, &pvc, NULL); } -void atm_reset_addr(struct atm_dev *dev) +void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this, *p; + struct list_head *head; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry_safe(this, p, &dev->local, entry) - kfree(this); + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry_safe(this, p, head, entry) { + list_del(&this->entry); + kfree(this); + } spin_unlock_irqrestore(&dev->lock, flags); - notify_sigd(dev); + if (head == &dev->local) + notify_sigd(dev); } -int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) +int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this; + struct list_head *head; int error; error = check_addr(addr); if (error) return error; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry(this, &dev->local, entry) { + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry(this, head, entry) { if (identical(&this->addr, addr)) { spin_unlock_irqrestore(&dev->lock, flags); return -EEXIST; @@ -78,28 +92,36 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) return -ENOMEM; } this->addr = *addr; - list_add(&this->entry, &dev->local); + list_add(&this->entry, head); spin_unlock_irqrestore(&dev->lock, flags); - notify_sigd(dev); + if (head == &dev->local) + notify_sigd(dev); return 0; } -int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) +int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this; + struct list_head *head; int error; error = check_addr(addr); if (error) return error; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry(this, &dev->local, entry) { + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry(this, head, entry) { if (identical(&this->addr, addr)) { list_del(&this->entry); spin_unlock_irqrestore(&dev->lock, flags); kfree(this); - notify_sigd(dev); + if (head == &dev->local) + notify_sigd(dev); return 0; } } @@ -108,22 +130,27 @@ int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) } int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user * buf, - size_t size) + size_t size, enum atm_addr_type_t atype) { unsigned long flags; struct atm_dev_addr *this; + struct list_head *head; int total = 0, error; struct sockaddr_atmsvc *tmp_buf, *tmp_bufp; spin_lock_irqsave(&dev->lock, flags); - list_for_each_entry(this, &dev->local, entry) + if (atype == ATM_ADDR_LECS) + head = &dev->lecs; + else + head = &dev->local; + list_for_each_entry(this, head, entry) total += sizeof(struct sockaddr_atmsvc); tmp_buf = tmp_bufp = kmalloc(total, GFP_ATOMIC); if (!tmp_buf) { spin_unlock_irqrestore(&dev->lock, flags); return -ENOMEM; } - list_for_each_entry(this, &dev->local, entry) + list_for_each_entry(this, head, entry) memcpy(tmp_bufp++, &this->addr, sizeof(struct sockaddr_atmsvc)); spin_unlock_irqrestore(&dev->lock, flags); error = total > size ? -E2BIG : total; diff --git a/net/atm/addr.h b/net/atm/addr.h index 3099d21feeaa..f39433ad45da 100644 --- a/net/atm/addr.h +++ b/net/atm/addr.h @@ -9,10 +9,12 @@ #include <linux/atm.h> #include <linux/atmdev.h> - -void atm_reset_addr(struct atm_dev *dev); -int atm_add_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr); -int atm_del_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr); -int atm_get_addr(struct atm_dev *dev,struct sockaddr_atmsvc __user *buf,size_t size); +void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type); +int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t type); +int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr, + enum atm_addr_type_t type); +int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf, + size_t size, enum atm_addr_type_t type); #endif diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index b2113c3454ae..223c7ad5bd0f 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -25,7 +25,7 @@ int atm_charge(struct atm_vcc *vcc,int truesize) struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size, - int gfp_flags) + gfp_t gfp_flags) { struct sock *sk = sk_atm(vcc); int guess = atm_guess_pdu2truesize(pdu_size); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 289956c4dd3e..72f3f7b8de80 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -220,7 +220,7 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) /* netif_stop_queue(dev); */ dev_kfree_skb(skb); read_unlock(&devs_lock); - return -EUNATCH; + return 0; } if (!br2684_xmit_vcc(skb, brdev, brvcc)) { /* diff --git a/net/atm/clip.c b/net/atm/clip.c index 28dab55a4387..4f54c9a5e84a 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -310,7 +310,7 @@ static int clip_constructor(struct neighbour *neigh) if (neigh->type != RTN_UNICAST) return -EINVAL; rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) { rcu_read_unlock(); return -EINVAL; diff --git a/net/atm/common.c b/net/atm/common.c index e93e838069e8..63feea49fb13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -46,7 +46,7 @@ static void __vcc_insert_socket(struct sock *sk) struct atm_vcc *vcc = atm_sk(sk); struct hlist_head *head = &vcc_hash[vcc->vci & (VCC_HTABLE_SIZE - 1)]; - sk->sk_hashent = vcc->vci & (VCC_HTABLE_SIZE - 1); + sk->sk_hash = vcc->vci & (VCC_HTABLE_SIZE - 1); sk_add_node(sk, head); } @@ -178,8 +178,6 @@ static void vcc_destroy_socket(struct sock *sk) if (vcc->push) vcc->push(vcc, NULL); /* atmarpd has no push */ - vcc_remove_socket(sk); /* no more receive */ - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { atm_return(vcc,skb->truesize); kfree_skb(skb); @@ -188,6 +186,8 @@ static void vcc_destroy_socket(struct sock *sk) module_put(vcc->dev->ops->owner); atm_dev_put(vcc->dev); } + + vcc_remove_socket(sk); } diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index d89056ec44d4..a150198b05a3 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -105,17 +105,35 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (!error) sock->state = SS_CONNECTED; goto done; - default: + case ATM_SETBACKEND: + case ATM_NEWBACKENDIF: + { + atm_backend_t backend; + error = get_user(backend, (atm_backend_t __user *) argp); + if (error) + goto done; + switch (backend) { + case ATM_BACKEND_PPP: + request_module("pppoatm"); + break; + case ATM_BACKEND_BR2684: + request_module("br2684"); + break; + } + } + break; + case ATMMPC_CTRL: + case ATMMPC_DATA: + request_module("mpoa"); + break; + case ATMARPD_CTRL: + request_module("clip"); + break; + case ATMLEC_CTRL: + request_module("lec"); break; } - if (cmd == ATMMPC_CTRL || cmd == ATMMPC_DATA) - request_module("mpoa"); - if (cmd == ATMARPD_CTRL) - request_module("clip"); - if (cmd == ATMLEC_CTRL) - request_module("lec"); - error = -ENOIOCTLCMD; down(&ioctl_mutex); diff --git a/net/atm/lec.c b/net/atm/lec.c index a0752487026d..ad840b9afba8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -686,9 +686,19 @@ static unsigned char lec_ctrl_magic[] = { 0x01, 0x01 }; +#define LEC_DATA_DIRECT_8023 2 +#define LEC_DATA_DIRECT_8025 3 + +static int lec_is_data_direct(struct atm_vcc *vcc) +{ + return ((vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8023) || + (vcc->sap.blli[0].l3.tr9577.snap[4] == LEC_DATA_DIRECT_8025)); +} + static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) { + unsigned long flags; struct net_device *dev = (struct net_device *)vcc->proto_data; struct lec_priv *priv = (struct lec_priv *)dev->priv; @@ -728,7 +738,8 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); } else { /* Data frame, queue to protocol handlers */ - unsigned char *dst; + struct lec_arp_table *entry; + unsigned char *src, *dst; atm_return(vcc,skb->truesize); if (*(uint16_t *)skb->data == htons(priv->lecid) || @@ -741,10 +752,30 @@ lec_push(struct atm_vcc *vcc, struct sk_buff *skb) return; } #ifdef CONFIG_TR - if (priv->is_trdev) dst = ((struct lecdatahdr_8025 *)skb->data)->h_dest; + if (priv->is_trdev) + dst = ((struct lecdatahdr_8025 *) skb->data)->h_dest; else #endif - dst = ((struct lecdatahdr_8023 *)skb->data)->h_dest; + dst = ((struct lecdatahdr_8023 *) skb->data)->h_dest; + + /* If this is a Data Direct VCC, and the VCC does not match + * the LE_ARP cache entry, delete the LE_ARP cache entry. + */ + spin_lock_irqsave(&priv->lec_arp_lock, flags); + if (lec_is_data_direct(vcc)) { +#ifdef CONFIG_TR + if (priv->is_trdev) + src = ((struct lecdatahdr_8025 *) skb->data)->h_source; + else +#endif + src = ((struct lecdatahdr_8023 *) skb->data)->h_source; + entry = lec_arp_find(priv, src); + if (entry && entry->vcc != vcc) { + lec_arp_remove(priv, entry); + kfree(entry); + } + } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); if (!(dst[0]&0x01) && /* Never filter Multi/Broadcast */ !priv->is_proxy && /* Proxy wants all the packets */ @@ -1990,6 +2021,12 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, found = entry->vcc; goto out; } + /* If the LE_ARP cache entry is still pending, reset count to 0 + * so another LE_ARP request can be made for this frame. + */ + if (entry->status == ESI_ARP_PENDING) { + entry->no_tries = 0; + } /* Data direct VC not yet set up, check to see if the unknown frame count is greater than the limit. If the limit has not been reached, allow the caller to send packet to diff --git a/net/atm/resources.c b/net/atm/resources.c index a57a9268bd24..415d2615d475 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -40,6 +40,7 @@ static struct atm_dev *__alloc_atm_dev(const char *type) dev->link_rate = ATM_OC3_PCR; spin_lock_init(&dev->lock); INIT_LIST_HEAD(&dev->local); + INIT_LIST_HEAD(&dev->lecs); return dev; } @@ -320,10 +321,12 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) error = -EPERM; goto done; } - atm_reset_addr(dev); + atm_reset_addr(dev, ATM_ADDR_LOCAL); break; case ATM_ADDADDR: case ATM_DELADDR: + case ATM_ADDLECSADDR: + case ATM_DELLECSADDR: if (!capable(CAP_NET_ADMIN)) { error = -EPERM; goto done; @@ -335,14 +338,21 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) error = -EFAULT; goto done; } - if (cmd == ATM_ADDADDR) - error = atm_add_addr(dev, &addr); + if (cmd == ATM_ADDADDR || cmd == ATM_ADDLECSADDR) + error = atm_add_addr(dev, &addr, + (cmd == ATM_ADDADDR ? + ATM_ADDR_LOCAL : ATM_ADDR_LECS)); else - error = atm_del_addr(dev, &addr); + error = atm_del_addr(dev, &addr, + (cmd == ATM_DELADDR ? + ATM_ADDR_LOCAL : ATM_ADDR_LECS)); goto done; } case ATM_GETADDR: - error = atm_get_addr(dev, buf, len); + case ATM_GETLECSADDR: + error = atm_get_addr(dev, buf, len, + (cmd == ATM_GETADDR ? + ATM_ADDR_LOCAL : ATM_ADDR_LECS)); if (error < 0) goto done; size = error; diff --git a/net/atm/signaling.c b/net/atm/signaling.c index f7c449ac1800..e7211a7f382c 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -217,8 +217,9 @@ void sigd_enq(struct atm_vcc *vcc,enum atmsvc_msg_type type, static void purge_vcc(struct atm_vcc *vcc) { if (sk_atm(vcc)->sk_family == PF_ATMSVC && - !test_bit(ATM_VF_META,&vcc->flags)) { - set_bit(ATM_VF_RELEASED,&vcc->flags); + !test_bit(ATM_VF_META, &vcc->flags)) { + set_bit(ATM_VF_RELEASED, &vcc->flags); + clear_bit(ATM_VF_REGIS, &vcc->flags); vcc_release_async(vcc, -EUNATCH); } } @@ -243,8 +244,7 @@ static void sigd_close(struct atm_vcc *vcc) sk_for_each(s, node, head) { struct atm_vcc *vcc = atm_sk(s); - if (vcc->dev) - purge_vcc(vcc); + purge_vcc(vcc); } } read_unlock(&vcc_sklist_lock); diff --git a/net/atm/svc.c b/net/atm/svc.c index 08e46052a3e4..d7b266136bf6 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -302,6 +302,7 @@ static int svc_listen(struct socket *sock,int backlog) error = -EINVAL; goto out; } + vcc_insert_socket(sk); set_bit(ATM_VF_WAITING, &vcc->flags); prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local); diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 810c9c76c2e0..73cfc3411c46 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -123,7 +123,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) } skb_pull(skb, 1); /* Remove PID */ - skb->h.raw = skb->data; + skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; skb->dev = ax25->ax25_dev->dev; skb->pkt_type = PACKET_HOST; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index d3d6bc547212..59b2dd36baa7 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -372,7 +372,7 @@ static struct proto l2cap_proto = { .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) +static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 173f46e8cdae..35adce6482b6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -229,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) d->rx_credits = RFCOMM_DEFAULT_CREDITS; } -struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio) +struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) { struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); if (!d) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f49e7e938bfb..a2b30f0aedb7 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -284,7 +284,7 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) +static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 1bca860a6109..158a9c46d863 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de skb->destructor = rfcomm_wfree; } -static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority) +static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, gfp_t priority) { if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) { struct sk_buff *skb = alloc_skb(size, priority); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ce7ab7dfa0b2..997e42df115c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -418,7 +418,7 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) +static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 069253f830c1..2d24fb400e0c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,7 +31,8 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - if (skb->len > skb->dev->mtu) + /* drop mtu oversized packets except tso */ + if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 91bb895375f4..defcf6a8607c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -79,7 +79,6 @@ static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; - dev->br_port = NULL; p->br = NULL; p->dev = NULL; dev_put(dev); @@ -100,6 +99,7 @@ static void del_nbp(struct net_bridge_port *p) struct net_bridge *br = p->br; struct net_device *dev = p->dev; + dev->br_port = NULL; dev_set_promiscuity(dev, -1); spin_lock_bh(&br->lock); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2d52fee63a8c..d8e36b775125 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -214,9 +214,11 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) .tos = RT_TOS(iph->tos)} }, .proto = 0}; if (!ip_route_output_key(&rt, &fl)) { - /* Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. */ - if (((struct dst_entry *)rt)->dev == dev) { + /* - Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. + * - Deal with redirected traffic. */ + if (((struct dst_entry *)rt)->dev == dev || + rt->rt_type == RTN_LOCAL) { skb->dst = (struct dst_entry *)rt; goto bridged_dnat; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c4540144f0f4..f8ffbf6e2333 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -26,6 +26,7 @@ #include <linux/spinlock.h> #include <asm/uaccess.h> #include <linux/smp.h> +#include <linux/cpumask.h> #include <net/sock.h> /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -823,10 +824,11 @@ static int translate_table(struct ebt_replace *repl, /* this will get free'd in do_replace()/ebt_register_table() if an error occurs */ newinfo->chainstack = (struct ebt_chainstack **) - vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack)); + vmalloc((highest_possible_processor_id()+1) + * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack) return -ENOMEM; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { newinfo->chainstack[i] = vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); if (!newinfo->chainstack[i]) { @@ -895,9 +897,12 @@ static void get_counters(struct ebt_counter *oldcounters, /* counters of cpu 0 */ memcpy(counters, oldcounters, - sizeof(struct ebt_counter) * nentries); + sizeof(struct ebt_counter) * nentries); + /* add other counters to those of cpu 0 */ - for (cpu = 1; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { + if (cpu == 0) + continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) { counters[i].pcnt += counter_base[i].pcnt; @@ -929,7 +934,8 @@ static int do_replace(void __user *user, unsigned int len) BUGPRINT("Entries_size never zero\n"); return -EINVAL; } - countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(tmp.nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); if (!newinfo) @@ -1022,7 +1028,7 @@ static int do_replace(void __user *user, unsigned int len) vfree(table->entries); if (table->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->chainstack[i]); vfree(table->chainstack); } @@ -1040,7 +1046,7 @@ free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1132,7 +1138,8 @@ int ebt_register_table(struct ebt_table *table) return -EINVAL; } - countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus(); + countersize = COUNTER_OFFSET(table->table->nentries) * + (highest_possible_processor_id()+1); newinfo = (struct ebt_table_info *) vmalloc(sizeof(struct ebt_table_info) + countersize); ret = -ENOMEM; @@ -1186,7 +1193,7 @@ free_unlock: up(&ebt_mutex); free_chainstack: if (newinfo->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } @@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) up(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { - for (i = 0; i < num_possible_cpus(); i++) + for_each_cpu(i) vfree(table->private->chainstack[i]); vfree(table->private->chainstack); } diff --git a/net/core/datagram.c b/net/core/datagram.c index da9bf71421a7..81987df536eb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -211,74 +211,45 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to, int len) { - int start = skb_headlen(skb); - int i, copy = start - offset; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - if (memcpy_toiovec(to, skb->data + offset, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } + int i, err, fraglen, end = 0; + struct sk_buff *next = skb_shinfo(skb)->frag_list; +next_skb: + fraglen = skb_headlen(skb); + i = -1; - /* Copy paged appendix. Hmm... why does this look so complicated? */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); + while (1) { + int start = end; - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + if ((end += fraglen) > offset) { + int copy = end - offset, o = offset - start; if (copy > len) copy = len; - vaddr = kmap(page); - err = memcpy_toiovec(to, vaddr + frag->page_offset + - offset - start, copy); - kunmap(page); + if (i == -1) + err = memcpy_toiovec(to, skb->data + o, copy); + else { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + void *p = kmap(page) + frag->page_offset + o; + err = memcpy_toiovec(to, p, copy); + kunmap(page); + } if (err) goto fault; if (!(len -= copy)) return 0; offset += copy; } - start = end; + if (++i >= skb_shinfo(skb)->nr_frags) + break; + fraglen = skb_shinfo(skb)->frags[i].size; } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_iovec(list, - offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } + if (next) { + skb = next; + BUG_ON(skb_shinfo(skb)->frag_list); + next = skb->next; + goto next_skb; } - if (!len) - return 0; - fault: return -EFAULT; } diff --git a/net/core/dev.c b/net/core/dev.c index c01511e3d0c1..a44eeef24edf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -574,6 +574,8 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) return dev; } +EXPORT_SYMBOL(dev_getbyhwaddr); + struct net_device *dev_getfirstbyhwtype(unsigned short type) { struct net_device *dev; @@ -1130,7 +1132,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #endif /* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) +int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) { unsigned int size; u8 *data; @@ -1257,6 +1259,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; + spin_lock_prefetch(&dev->queue_lock); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 39fc55edf691..4128fc76ac3a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -61,7 +61,9 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); static struct neigh_table *neigh_tables; +#ifdef CONFIG_PROC_FS static struct file_operations neigh_stat_seq_fops; +#endif /* Neighbour hash table buckets are protected with rwlock tbl->lock. @@ -725,6 +727,13 @@ static __inline__ int neigh_max_probes(struct neighbour *n) p->ucast_probes + p->app_probes + p->mcast_probes); } +static inline void neigh_add_timer(struct neighbour *n, unsigned long when) +{ + if (unlikely(mod_timer(&n->timer, when))) { + printk("NEIGH: BUG, double timer add, state is %x\n", + n->nud_state); + } +} /* Called when a timer expires for a neighbour entry. */ @@ -809,8 +818,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); if (time_before(next, jiffies + HZ/2)) next = jiffies + HZ/2; - neigh->timer.expires = next; - add_timer(&neigh->timer); + neigh_add_timer(neigh, next); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { struct sk_buff *skb = skb_peek(&neigh->arp_queue); @@ -852,8 +860,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; neigh_hold(neigh); - neigh->timer.expires = now + 1; - add_timer(&neigh->timer); + neigh_add_timer(neigh, now + 1); } else { neigh->nud_state = NUD_FAILED; write_unlock_bh(&neigh->lock); @@ -866,8 +873,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); neigh_hold(neigh); neigh->nud_state = NUD_DELAY; - neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; - add_timer(&neigh->timer); + neigh_add_timer(neigh, + jiffies + neigh->parms->delay_probe_time); } if (neigh->nud_state == NUD_INCOMPLETE) { @@ -1013,10 +1020,10 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_del_timer(neigh); if (new & NUD_IN_TIMER) { neigh_hold(neigh); - neigh->timer.expires = jiffies + + neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? - neigh->parms->reachable_time : 0); - add_timer(&neigh->timer); + neigh->parms->reachable_time : + 0))); } neigh->nud_state = new; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5265dfd69928..802fe11efad0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -703,7 +703,7 @@ int netpoll_setup(struct netpoll *np) if (!np->local_ip) { rcu_read_lock(); - in_dev = __in_dev_get(ndev); + in_dev = __in_dev_get_rcu(ndev); if (!in_dev || !in_dev->ifa_list) { rcu_read_unlock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ef430b1e8e42..5f043d346694 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -186,7 +186,7 @@ /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" +#define PG_PROC_DIR "net/pktgen" #define MAX_CFLOWS 65536 @@ -1476,18 +1476,7 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, static int create_proc_dir(void) { - int len; - /* does proc_dir already exists */ - len = strlen(PG_PROC_DIR); - - for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) { - if ((pg_proc_dir->namelen == len) && - (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) - break; - } - - if (!pg_proc_dir) - pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, NULL); if (!pg_proc_dir) return -ENODEV; @@ -1497,7 +1486,7 @@ static int create_proc_dir(void) static int remove_proc_dir(void) { - remove_proc_entry(PG_PROC_DIR, proc_net); + remove_proc_entry(PG_PROC_DIR, NULL); return 0; } @@ -1678,13 +1667,12 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(pkt_dev->odev); + in_dev = __in_dev_get_rcu(pkt_dev->odev); if (in_dev) { if (in_dev->ifa_list) { pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; pkt_dev->saddr_max = pkt_dev->saddr_min; } - __in_dev_put(in_dev); } rcu_read_unlock(); } @@ -2908,7 +2896,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) pkt_dev->udp_dst_max = 9; strncpy(pkt_dev->ifname, ifname, 31); - sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname); + sprintf(pkt_dev->fname, "%s/%s", PG_PROC_DIR, ifname); if (! pktgen_setup_dev(pkt_dev)) { printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); @@ -2981,7 +2969,7 @@ static int pktgen_create_thread(const char* name, int cpu) spin_lock_init(&t->if_lock); t->cpu = cpu; - sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name); + sprintf(t->fname, "%s/%s", PG_PROC_DIR, t->name); t->proc_ent = create_proc_entry(t->fname, 0600, NULL); if (!t->proc_ent) { printk("pktgen: cannot create %s procfs entry.\n", t->fname); @@ -3064,7 +3052,7 @@ static int __init pg_init(void) create_proc_dir(); - sprintf(module_fname, "net/%s/pgctrl", PG_PROC_DIR); + sprintf(module_fname, "%s/pgctrl", PG_PROC_DIR); module_proc_ent = create_proc_entry(module_fname, 0600, NULL); if (!module_proc_ent) { printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f80a28785610..af9b1516e21f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -71,8 +71,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; -struct timeval __read_mostly skb_tv_base; - /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -132,7 +130,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, +struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone) { struct sk_buff *skb; @@ -200,7 +198,7 @@ nodata: */ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { struct sk_buff *skb; u8 *data; @@ -363,7 +361,7 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; @@ -502,7 +500,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -541,7 +539,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_ma * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -600,7 +598,7 @@ out: */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { int i; u8 *data; @@ -691,7 +689,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, - unsigned int __nocast gfp_mask) + gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -1708,8 +1706,6 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_fclone_cache) panic("cannot create skbuff cache"); - - do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1743,4 +1739,3 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); -EXPORT_SYMBOL(skb_tv_base); diff --git a/net/core/sock.c b/net/core/sock.c index ac63b56e23b2..1c52fe809eda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -637,7 +637,7 @@ lenout: * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, unsigned int __nocast priority, +struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -660,16 +660,20 @@ struct sock *sk_alloc(int family, unsigned int __nocast priority, sock_lock_init(sk); } - if (security_sk_alloc(sk, family, priority)) { - if (slab != NULL) - kmem_cache_free(slab, sk); - else - kfree(sk); - sk = NULL; - } else - __module_get(prot->owner); + if (security_sk_alloc(sk, family, priority)) + goto out_free; + + if (!try_module_get(prot->owner)) + goto out_free; } return sk; + +out_free: + if (slab != NULL) + kmem_cache_free(slab, sk); + else + kfree(sk); + return NULL; } void sk_free(struct sock *sk) @@ -700,7 +704,7 @@ void sk_free(struct sock *sk) module_put(owner); } -struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority) +struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); @@ -841,7 +845,7 @@ unsigned long sock_i_ino(struct sock *sk) * Allocate a skb from the socket's send buffer. */ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority) + gfp_t priority) { if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff * skb = alloc_skb(size, priority); @@ -857,7 +861,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, * Allocate a skb from the socket's receive buffer. */ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority) + gfp_t priority) { if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { struct sk_buff *skb = alloc_skb(size, priority); @@ -872,7 +876,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, /* * Allocate a memory block from the socket's option memory buffer. */ -void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority) +void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { if ((unsigned)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { diff --git a/net/dccp/Makefile b/net/dccp/Makefile index fb97bb042455..344a8da153fc 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o +dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o + obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o dccp_diag-y := diag.o diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c new file mode 100644 index 000000000000..c9a62cca22fc --- /dev/null +++ b/net/dccp/ackvec.c @@ -0,0 +1,419 @@ +/* + * net/dccp/ackvec.c + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License; + */ + +#include "ackvec.h" +#include "dccp.h" + +#include <linux/dccp.h> +#include <linux/skbuff.h> + +#include <net/sock.h> + +int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + int len = av->dccpav_vec_len + 2; + struct timeval now; + u32 elapsed_time; + unsigned char *to, *from; + + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return -1; + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1) + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = av->dccpav_vec_len; + from = av->dccpav_buf + av->dccpav_buf_head; + + /* Check if buf_head wraps */ + if (av->dccpav_buf_head + len > av->dccpav_vec_len) { + const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = av->dccpav_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. + * + * This implemention uses just one ack record for now. + */ + av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + av->dccpav_ack_ptr = av->dccpav_buf_head; + av->dccpav_ack_ackno = av->dccpav_buf_ackno; + av->dccpav_ack_nonce = av->dccpav_buf_nonce; + av->dccpav_sent_len = av->dccpav_vec_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", + debug_prefix, av->dccpav_sent_len, + (unsigned long long)av->dccpav_ack_seqno, + (unsigned long long)av->dccpav_ack_ackno); + return -1; +} + +struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, + const gfp_t priority) +{ + struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); + + if (av != NULL) { + av->dccpav_buf_len = len; + av->dccpav_buf_head = + av->dccpav_buf_tail = av->dccpav_buf_len - 1; + av->dccpav_buf_ackno = + av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU; + av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; + av->dccpav_ack_ptr = 0; + av->dccpav_time.tv_sec = 0; + av->dccpav_time.tv_usec = 0; + av->dccpav_sent_len = av->dccpav_vec_len = 0; + } + + return av; +} + +void dccp_ackvec_free(struct dccp_ackvec *av) +{ + kfree(av); +} + +static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, + const unsigned int index) +{ + return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; +} + +static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, + const unsigned int index) +{ + return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (av->dccpav_vec_len + packets > av->dccpav_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = av->dccpav_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += av->dccpav_buf_len; + } + + av->dccpav_buf_head = new_head; + + if (gap > 0) + memset(av->dccpav_buf + av->dccpav_buf_head + 1, + DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); + + av->dccpav_buf[av->dccpav_buf_head] = state; + av->dccpav_vec_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpav_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (av->dccpav_vec_len == 0) { + av->dccpav_buf[av->dccpav_buf_head] = state; + av->dccpav_vec_len = 1; + } else if (after48(ackno, av->dccpav_buf_ackno)) { + const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, + ackno); + + /* + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackvec_state(av, av->dccpav_buf_head) == state && + (dccp_ackvec_len(av, av->dccpav_buf_head) < + DCCP_ACKVEC_LEN_MASK)) + av->dccpav_buf[av->dccpav_buf_head]++; + else if (dccp_ackvec_set_buf_head_state(av, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and + * S <= buf_ackno, the HC-Receiver will scan the table + * for the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); + unsigned int index = av->dccpav_buf_head; + + while (1) { + const u8 len = dccp_ackvec_len(av, index); + const u8 state = dccp_ackvec_state(av, index); + /* + * valid packets not yet in dccpav_buf have a reserved + * entry, with a len equal to 0. + */ + if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our + reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long)ackno); + av->dccpav_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == av->dccpav_buf_len) + index = 0; + } + } + + av->dccpav_buf_ackno = ackno; + dccp_timestamp(sk, &av->dccpav_time); +out: + dccp_pr_debug(""); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long)ackno); + return -EILSEQ; +} + +#ifdef CONFIG_IP_DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long)ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackvec_print(const struct dccp_ackvec *av) +{ + dccp_ackvector_print(av->dccpav_buf_ackno, + av->dccpav_buf + av->dccpav_buf_head, + av->dccpav_vec_len); +} +#endif + +static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) +{ + /* + * As we're keeping track of the ack vector size (dccpav_vec_len) and + * the sent ack vector size (dccpav_sent_len) we don't need + * dccpav_buf_tail at all, but keep this code here as in the future + * we'll implement a vector of ack records, as suggested in + * draft-ietf-dccp-spec-11.txt Appendix A. -acme + */ +#if 0 + av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; + if (av->dccpav_buf_tail >= av->dccpav_vec_len) + av->dccpav_buf_tail -= av->dccpav_vec_len; +#endif + av->dccpav_vec_len -= av->dccpav_sent_len; +} + +void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, + const u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == av->dccpav_ack_seqno) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + (unsigned long long)av->dccpav_ack_seqno, + (unsigned long long)av->dccpav_ack_ackno); + dccp_ackvec_trow_away_ack_record(av); + av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK + * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're + * not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */ + if (before48(ackno, av->dccpav_ack_seqno)) { + /* dccp_pr_debug_cat("yes\n"); */ + return; + } + /* dccp_pr_debug_cat("no\n"); */ + + i = len; + while (i--) { + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + /* + * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, + * av->dccpav_ack_seqno, ackno); + */ + if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & + DCCP_ACKVEC_STATE_MASK) >> 6; + /* dccp_pr_debug_cat("yes\n"); */ + + if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = + dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", + debug_prefix, len, + (unsigned long long) + av->dccpav_ack_seqno, + (unsigned long long) + av->dccpav_ack_ackno); + dccp_ackvec_trow_away_ack_record(av); + } + /* + * If dccpav_ack_seqno was not received, no problem + * we'll send another ACK vector. + */ + av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + /* dccp_pr_debug_cat("no\n"); */ + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} + +int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len) +{ + if (len > DCCP_MAX_ACKVEC_LEN) + return -1; + + /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ + dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); + return 0; +} diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h new file mode 100644 index 000000000000..d0fd6c60c574 --- /dev/null +++ b/net/dccp/ackvec.h @@ -0,0 +1,133 @@ +#ifndef _ACKVEC_H +#define _ACKVEC_H +/* + * net/dccp/ackvec.h + * + * An implementation of the DCCP protocol + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/config.h> +#include <linux/compiler.h> +#include <linux/time.h> +#include <linux/types.h> + +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACKVEC_LEN 253 + +#define DCCP_ACKVEC_STATE_RECEIVED 0 +#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackvec - ack vector + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpav_buf_head - circular buffer head + * @dccpav_buf_tail - circular buffer tail + * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %dccpav_buf_head) + * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpav_ack_seqno - the Sequence Number used for the packet + * (HC-Receiver seqno) + * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpav_ack_ackno - the Acknowledgement Number used for the packet + * (HC-Sender seqno) + * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpav_buf_len - circular buffer length + * @dccpav_time - the time in usecs + * @dccpav_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackvec { + unsigned int dccpav_buf_head; + unsigned int dccpav_buf_tail; + u64 dccpav_buf_ackno; + u64 dccpav_ack_seqno; + u64 dccpav_ack_ackno; + unsigned int dccpav_ack_ptr; + unsigned int dccpav_sent_len; + unsigned int dccpav_vec_len; + unsigned int dccpav_buf_len; + struct timeval dccpav_time; + u8 dccpav_buf_nonce; + u8 dccpav_ack_nonce; + u8 dccpav_buf[0]; +}; + +struct sock; +struct sk_buff; + +#ifdef CONFIG_IP_DCCP_ACKVEC +extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, + const gfp_t priority); +extern void dccp_ackvec_free(struct dccp_ackvec *av); + +extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state); + +extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno); +extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len); + +extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return av->dccpav_sent_len != av->dccpav_vec_len; +} +#else /* CONFIG_IP_DCCP_ACKVEC */ +static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, + const gfp_t priority) +{ + return NULL; +} + +static inline void dccp_ackvec_free(struct dccp_ackvec *av) +{ +} + +static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + return -1; +} + +static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno) +{ +} + +static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u8 opt, const u8 *value, const u8 len) +{ + return -1; +} + +static inline int dccp_insert_option_ackvec(const struct sock *sk, + const struct sk_buff *skb) +{ + return -1; +} + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return 0; +} +#endif /* CONFIG_IP_DCCP_ACKVEC */ +#endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 962f1e9e2f7e..c37eeeaf5c6e 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -14,6 +14,7 @@ */ #include <net/sock.h> +#include <linux/compiler.h> #include <linux/dccp.h> #include <linux/list.h> #include <linux/module.h> @@ -54,6 +55,14 @@ struct ccid { struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, struct tcp_info *info); + int (*ccid_hc_rx_getsockopt)(struct sock *sk, + const int optname, int len, + u32 __user *optval, + int __user *optlen); + int (*ccid_hc_tx_getsockopt)(struct sock *sk, + const int optname, int len, + u32 __user *optval, + int __user *optlen); }; extern int ccid_register(struct ccid *ccid); @@ -101,14 +110,14 @@ static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_rx_exit != NULL && + if (ccid != NULL && ccid->ccid_hc_rx_exit != NULL && dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) ccid->ccid_hc_rx_exit(sk); } static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) { - if (ccid->ccid_hc_tx_exit != NULL && + if (ccid != NULL && ccid->ccid_hc_tx_exit != NULL && dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) ccid->ccid_hc_tx_exit(sk); } @@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, if (ccid->ccid_hc_tx_get_info != NULL) ccid->ccid_hc_tx_get_info(sk, info); } + +static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, + const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + int rc = -ENOPROTOOPT; + if (ccid->ccid_hc_rx_getsockopt != NULL) + rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len, + optval, optlen); + return rc; +} + +static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, + const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + int rc = -ENOPROTOOPT; + if (ccid->ccid_hc_tx_getsockopt != NULL) + rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len, + optval, optlen); + return rc; +} #endif /* _CCID_H */ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 38aa84986118..aa68e0ab274d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = hctx->ccid3hctx_rtt; } +static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_RX_INFO: + if (len < sizeof(hcrx->ccid3hcrx_tfrc)) + return -EINVAL; + len = sizeof(hcrx->ccid3hcrx_tfrc); + val = &hcrx->ccid3hcrx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + +static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, + u32 __user *optval, int __user *optlen) +{ + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const void *val; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + switch (optname) { + case DCCP_SOCKOPT_CCID_TX_INFO: + if (len < sizeof(hctx->ccid3hctx_tfrc)) + return -EINVAL; + len = sizeof(hctx->ccid3hctx_tfrc); + val = &hctx->ccid3hctx_tfrc; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, val, len)) + return -EFAULT; + + return 0; +} + static struct ccid ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", @@ -1139,6 +1193,8 @@ static struct ccid ccid3 = { .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, + .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, + .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; module_param(ccid3_debug, int, 0444); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index eb248778eea3..0bde4583d091 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -40,6 +40,7 @@ #include <linux/list.h> #include <linux/time.h> #include <linux/types.h> +#include <linux/tfrc.h> #define TFRC_MIN_PACKET_SIZE 16 #define TFRC_STD_PACKET_SIZE 256 @@ -93,12 +94,15 @@ struct ccid3_options_received { * @ccid3hctx_hist - Packet history */ struct ccid3_hc_tx_sock { - u32 ccid3hctx_x; - u32 ccid3hctx_x_recv; - u32 ccid3hctx_x_calc; + struct tfrc_tx_info ccid3hctx_tfrc; +#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x +#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv +#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc +#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt +#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p +#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto +#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - u32 ccid3hctx_rtt; - u32 ccid3hctx_p; u8 ccid3hctx_state; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; @@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock { struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; struct timeval ccid3hctx_t_nom; - u32 ccid3hctx_t_rto; - u32 ccid3hctx_t_ipi; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; struct ccid3_hc_rx_sock { + struct tfrc_rx_info ccid3hcrx_tfrc; +#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv +#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt +#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - u32 ccid3hcrx_rtt; - u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; @@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock { u16 ccid3hcrx_s; u32 ccid3hcrx_pinv; u32 ccid3hcrx_elapsed_time; - u32 ccid3hcrx_x_recv; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 13ad47ba1420..417d9d82df3e 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -36,7 +36,7 @@ struct dccp_li_hist_entry { static inline struct dccp_li_hist_entry * dccp_li_hist_entry_new(struct dccp_li_hist *hist, - const unsigned int __nocast prio) + const gfp_t prio) { return kmem_cache_alloc(hist->dccplih_slab, prio); } diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index b375ebdb7dcf..122e96737ff6 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -86,7 +86,7 @@ extern struct dccp_rx_hist_entry * static inline struct dccp_tx_hist_entry * dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const unsigned int __nocast prio) + const gfp_t prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -137,7 +137,7 @@ static inline struct dccp_rx_hist_entry * const struct sock *sk, const u32 ndp, const struct sk_buff *skb, - const unsigned int __nocast prio) + const gfp_t prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 95c4630b3b18..5871c027f9dc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -17,6 +17,7 @@ #include <net/snmp.h> #include <net/sock.h> #include <net/tcp.h> +#include "ackvec.h" #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; @@ -258,13 +259,12 @@ extern int dccp_v4_send_reset(struct sock *sk, extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { - __u8 dccpd_type; - __u8 dccpd_reset_code; - __u8 dccpd_service; - __u8 dccpd_ccval; + __u8 dccpd_type:4; + __u8 dccpd_ccval:4; + __u8 dccpd_reset_code; + __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; - int dccpd_opt_len; }; #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) @@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) (dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1)); } + +static inline int dccp_ack_pending(const struct sock *sk) +{ + const struct dccp_sock *dp = dccp_sk(sk); + return dp->dccps_timestamp_echo != 0 || +#ifdef CONFIG_IP_DCCP_ACKVEC + (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || +#endif + inet_csk_ack_scheduled(sk); +} extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern void dccp_insert_option_elapsed_time(struct sock *sk, @@ -372,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, extern struct socket *dccp_ctl_socket; -#define DCCP_ACKPKTS_STATE_RECEIVED 0 -#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) -#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) - -#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ -#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ - -/** struct dccp_ackpkts - acknowledgeable packets - * - * This data structure is the one defined in the DCCP draft - * Appendix A. - * - * @dccpap_buf_head - circular buffer head - * @dccpap_buf_tail - circular buffer tail - * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the - * buffer (i.e. %dccpap_buf_head) - * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked - * by the buffer with State 0 - * - * Additionally, the HC-Receiver must keep some information about the - * Ack Vectors it has recently sent. For each packet sent carrying an - * Ack Vector, it remembers four variables: - * - * @dccpap_ack_seqno - the Sequence Number used for the packet - * (HC-Receiver seqno) - * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. - * @dccpap_ack_ackno - the Acknowledgement Number used for the packet - * (HC-Sender seqno) - * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. - * - * @dccpap_buf_len - circular buffer length - * @dccpap_time - the time in usecs - * @dccpap_buf - circular buffer of acknowledgeable packets - */ -struct dccp_ackpkts { - unsigned int dccpap_buf_head; - unsigned int dccpap_buf_tail; - u64 dccpap_buf_ackno; - u64 dccpap_ack_seqno; - u64 dccpap_ack_ackno; - unsigned int dccpap_ack_ptr; - unsigned int dccpap_buf_vector_len; - unsigned int dccpap_ack_vector_len; - unsigned int dccpap_buf_len; - struct timeval dccpap_time; - u8 dccpap_buf_nonce; - u8 dccpap_ack_nonce; - u8 dccpap_buf[0]; -}; - -extern struct dccp_ackpkts * - dccp_ackpkts_alloc(unsigned int len, - const unsigned int __nocast priority); -extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, - u64 ackno, u8 state); -extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, - struct sock *sk, u64 ackno); - extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); static inline suseconds_t timeval_usecs(const struct timeval *tv) @@ -471,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -#ifdef CONFIG_IP_DCCP_DEBUG -extern void dccp_ackvector_print(const u64 ackno, - const unsigned char *vector, int len); -extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); -#else -static inline void dccp_ackvector_print(const u64 ackno, - const unsigned char *vector, - int len) { } -static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } -#endif - #endif /* _DCCP_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index c74034cf7ede..3454d5941900 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -16,6 +16,7 @@ #include <net/sock.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); if (dp->dccps_options.dccpo_send_ack_vector) - dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); } static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) @@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - /* - * FIXME: check ECN to see if we should use - * DCCP_ACKPKTS_STATE_ECN_MARKED - */ - if (dp->dccps_options.dccpo_send_ack_vector) { - struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKPKTS_STATE_RECEIVED)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " - "packets buffer full!\n"); - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MIN, - DCCP_RTO_MAX); - goto discard; - } - - /* - * FIXME: this activation is probably wrong, have to study more - * TCP delack machinery and how it fits into DCCP draft, but - * for now it kinda "works" 8) - */ - if (!inet_csk_ack_scheduled(sk)) { - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, - DCCP_RTO_MAX); - } - } + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); @@ -384,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } out_invalid_packet: - return 1; /* dccp_v4_do_rcv will send a reset, but... - FIXME: the reset code should be - DCCP_RESET_CODE_PACKET_ERROR */ + /* dccp_v4_do_rcv will send a reset */ + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -400,6 +375,9 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, case DCCP_PKT_RESET: inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); break; + case DCCP_PKT_DATA: + if (sk->sk_state == DCCP_RESPOND) + break; case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* @@ -418,7 +396,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_set_state(sk, DCCP_OPEN); - if (dh->dccph_type == DCCP_PKT_DATAACK) { + if (dh->dccph_type == DCCP_PKT_DATAACK || + dh->dccph_type == DCCP_PKT_DATA) { dccp_rcv_established(sk, skb, dh, len); queued = 1; /* packet was queued (by dccp_rcv_established) */ @@ -433,6 +412,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; @@ -473,7 +453,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dh->dccph_type == DCCP_PKT_RESET) goto discard; - /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ + /* Caller (dccp_v4_do_rcv) will send Reset */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } @@ -487,36 +468,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ) + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); - /* - * FIXME: check ECN to see if we should use - * DCCP_ACKPKTS_STATE_ECN_MARKED - */ - if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKPKTS_STATE_RECEIVED)) - goto discard; - /* - * FIXME: this activation is probably wrong, have to - * study more TCP delack machinery and how it fits into - * DCCP draft, but for now it kinda "works" 8) - */ - if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == - DCCP_MAX_SEQNO + 1) && - !inet_csk_ack_scheduled(sk)) { - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MIN, - DCCP_RTO_MAX); - } - } + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; } /* @@ -551,8 +513,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNC); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); @@ -563,13 +524,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNCACK); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); goto discard; } switch (sk->sk_state) { case DCCP_CLOSED: + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2afaa464e7f0..ae088d1347af 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -23,6 +23,7 @@ #include <net/tcp_states.h> #include <net/xfrm.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -61,27 +62,27 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, const int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, - dccp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); const struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } tw = NULL; /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -89,7 +90,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); @@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_role = DCCP_ROLE_CLIENT; + if (dccp_service_not_initialized(sk)) + return -EPROTO; + if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -661,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } +static inline int dccp_bad_service_code(const struct sock *sk, + const __u32 service) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_service == service) + return 0; + return !dccp_list_has_service(dp->dccps_service_list, service); +} + int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -669,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; + const __u32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; goto drop; + } + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -718,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = service; if (dccp_v4_send_response(sk, req, dst)) goto drop_and_free; @@ -735,6 +758,7 @@ drop_and_free: __reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; return -1; } @@ -1005,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); discard: kfree_skb(skb); @@ -1090,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb) goto discard_it; dh = dccp_hdr(skb); -#if 0 - /* - * Use something like this to simulate some DATA/DATAACK loss to test - * dccp_ackpkts_add, you'll get something like this on a session that - * sends 10 DATA/DATAACK packets: - * - * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| - * - * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet - * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets - * with the same state - * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet - * - * So... - * - * 281473596467422 was received - * 281473596467421 was not received - * 281473596467420 was received - * 281473596467419 was not received - * 281473596467418 was received - * 281473596467417 was not received - * 281473596467416 was received - * 281473596467415 was not received - * 281473596467414 was received - * 281473596467413 was received (this one was the 3way handshake - * RESPONSE) - * - */ - if (dh->dccph_type == DCCP_PKT_DATA || - dh->dccph_type == DCCP_PKT_DATAACK) { - static int discard = 0; - if (discard) { - discard = 0; - goto discard_it; - } - discard = 1; - } -#endif DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; @@ -1242,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk) do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackpkts = - dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_KERNEL); - - if (dp->dccps_hc_rx_ackpkts == NULL) + dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, + GFP_KERNEL); + if (dp->dccps_hc_rx_ackvec == NULL) return -ENOMEM; } @@ -1258,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, sk); if (dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL) { ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); - dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); - dp->dccps_hc_rx_ackpkts = NULL; + if (dp->dccps_options.dccpo_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; return -ENOMEM; } @@ -1280,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk) sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; + dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; return 0; } @@ -1301,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); + if (dp->dccps_service_list != NULL) { + kfree(dp->dccps_service_list); + dp->dccps_service_list = NULL; + } + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); - dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); - dp->dccps_hc_rx_ackpkts = NULL; + if (dp->dccps_options.dccpo_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 18461bc04cbe..1393461898bb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -19,6 +19,7 @@ #include <net/xfrm.h> #include <net/inet_timewait_sock.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -93,22 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct inet_connection_sock *newicsk = inet_csk(sk); struct dccp_sock *newdp = dccp_sk(newsk); - newdp->dccps_hc_rx_ackpkts = NULL; - newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + newdp->dccps_role = DCCP_ROLE_SERVER; + newdp->dccps_hc_rx_ackvec = NULL; + newdp->dccps_service_list = NULL; + newdp->dccps_service = dreq->dreq_service; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { - newdp->dccps_hc_rx_ackpkts = - dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, - GFP_ATOMIC); + newdp->dccps_hc_rx_ackvec = + dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, + GFP_ATOMIC); /* * XXX: We're using the same CCIDs set on the parent, * i.e. sk_clone copied the master sock and left the * CCID pointers for this child, that is why we do the * __ccid_get calls. */ - if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) + if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) goto out_free; } @@ -116,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newsk) != 0 || ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { - dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); + dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); out_free: diff --git a/net/dccp/options.c b/net/dccp/options.c index d4c4242d8dd7..0a76426c9aea 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -18,19 +18,15 @@ #include <linux/kernel.h> #include <linux/skbuff.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" -static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, - struct sock *sk, - const u64 ackno, - const unsigned char len, - const unsigned char *vector); - /* stores the default values for new connection. may be changed with sysctl */ static const struct dccp_options dccpo_default_values = { .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, - .dccpo_ccid = DCCPF_INITIAL_CCID, + .dccpo_rx_ccid = DCCPF_INITIAL_CCID, + .dccpo_tx_ccid = DCCPF_INITIAL_CCID, .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, }; @@ -113,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_ndp); break; case DCCPO_ACK_VECTOR_0: - if (len > DCCP_MAX_ACK_VECTOR_LEN) - goto out_invalid_option; - + case DCCPO_ACK_VECTOR_1: if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_ack_vector_len = len; - opt_recv->dccpor_ack_vector_idx = value - options; - - dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", - debug_prefix, len, - (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq); - dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, - value, len); - dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, - sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - len, value); + if (dp->dccps_options.dccpo_send_ack_vector && + dccp_ackvec_parse(sk, skb, opt, value, len)) + goto out_invalid_option; break; case DCCPO_TIMESTAMP: if (len != 4) @@ -352,86 +336,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); -#ifdef CONFIG_IP_DCCP_DEBUG - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT TX opt: " : "server TX opt: "; -#endif - struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - int len = ap->dccpap_buf_vector_len + 2; - struct timeval now; - u32 elapsed_time; - unsigned char *to, *from; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; - - if (elapsed_time != 0) - dccp_insert_option_elapsed_time(sk, skb, elapsed_time); - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " - "insert ACK Vector!\n"); - return; - } - - /* - * XXX: now we have just one ack vector sent record, so - * we have to wait for it to be cleared. - * - * Of course this is not acceptable, but this is just for - * basic testing now. - */ - if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) - return; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = len; - - len = ap->dccpap_buf_vector_len; - from = ap->dccpap_buf + ap->dccpap_buf_head; - - /* Check if buf_head wraps */ - if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { - const unsigned int tailsize = (ap->dccpap_buf_len - - ap->dccpap_buf_head); - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - from = ap->dccpap_buf; - } - - memcpy(to, from, len); - /* - * From draft-ietf-dccp-spec-11.txt: - * - * For each acknowledgement it sends, the HC-Receiver will add an - * acknowledgement record. ack_seqno will equal the HC-Receiver - * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will - * equal buf_nonce. - * - * This implemention uses just one ack record for now. - */ - ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - ap->dccpap_ack_ptr = ap->dccpap_buf_head; - ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; - ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; - ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; - - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu\n", - debug_prefix, ap->dccpap_ack_vector_len, - (unsigned long long) ap->dccpap_ack_seqno, - (unsigned long long) ap->dccpap_ack_ackno); -} - void dccp_timestamp(const struct sock *sk, struct timeval *tv) { const struct dccp_sock *dp = dccp_sk(sk); @@ -528,9 +432,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (!dccp_packet_without_ack(skb)) { if (dp->dccps_options.dccpo_send_ack_vector && - (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != - DCCP_MAX_SEQNO + 1)) - dccp_insert_option_ack_vector(sk, skb); + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) + dccp_insert_option_ackvec(sk, skb); if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } @@ -557,331 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) } } } - -struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, - const unsigned int __nocast priority) -{ - struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); - - if (ap != NULL) { -#ifdef CONFIG_IP_DCCP_DEBUG - memset(ap->dccpap_buf, 0xFF, len); -#endif - ap->dccpap_buf_len = len; - ap->dccpap_buf_head = - ap->dccpap_buf_tail = - ap->dccpap_buf_len - 1; - ap->dccpap_buf_ackno = - ap->dccpap_ack_ackno = - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; - ap->dccpap_ack_ptr = 0; - ap->dccpap_time.tv_sec = 0; - ap->dccpap_time.tv_usec = 0; - ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; - } - - return ap; -} - -void dccp_ackpkts_free(struct dccp_ackpkts *ap) -{ - if (ap != NULL) { -#ifdef CONFIG_IP_DCCP_DEBUG - memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); -#endif - kfree(ap); - } -} - -static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, - const unsigned int index) -{ - return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; -} - -static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, - const unsigned int index) -{ - return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; -} - -/* - * If several packets are missing, the HC-Receiver may prefer to enter multiple - * bytes with run length 0, rather than a single byte with a larger run length; - * this simplifies table updates if one of the missing packets arrives. - */ -static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, - const unsigned int packets, - const unsigned char state) -{ - unsigned int gap; - signed long new_head; - - if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) - return -ENOBUFS; - - gap = packets - 1; - new_head = ap->dccpap_buf_head - packets; - - if (new_head < 0) { - if (gap > 0) { - memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, - gap + new_head + 1); - gap = -new_head; - } - new_head += ap->dccpap_buf_len; - } - - ap->dccpap_buf_head = new_head; - - if (gap > 0) - memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, - DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); - - ap->dccpap_buf[ap->dccpap_buf_head] = state; - ap->dccpap_buf_vector_len += packets; - return 0; -} - -/* - * Implements the draft-ietf-dccp-spec-11.txt Appendix A - */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, - u64 ackno, u8 state) -{ - /* - * Check at the right places if the buffer is full, if it is, tell the - * caller to start dropping packets till the HC-Sender acks our ACK - * vectors, when we will free up space in dccpap_buf. - * - * We may well decide to do buffer compression, etc, but for now lets - * just drop. - * - * From Appendix A: - * - * Of course, the circular buffer may overflow, either when the - * HC-Sender is sending data at a very high rate, when the - * HC-Receiver's acknowledgements are not reaching the HC-Sender, - * or when the HC-Sender is forgetting to acknowledge those acks - * (so the HC-Receiver is unable to clean up old state). In this - * case, the HC-Receiver should either compress the buffer (by - * increasing run lengths when possible), transfer its state to - * a larger buffer, or, as a last resort, drop all received - * packets, without processing them whatsoever, until its buffer - * shrinks again. - */ - - /* See if this is the first ackno being inserted */ - if (ap->dccpap_buf_vector_len == 0) { - ap->dccpap_buf[ap->dccpap_buf_head] = state; - ap->dccpap_buf_vector_len = 1; - } else if (after48(ackno, ap->dccpap_buf_ackno)) { - const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, - ackno); - - /* - * Look if the state of this packet is the same as the - * previous ackno and if so if we can bump the head len. - */ - if (delta == 1 && - dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && - (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < - DCCP_ACKPKTS_LEN_MASK)) - ap->dccpap_buf[ap->dccpap_buf_head]++; - else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) - return -ENOBUFS; - } else { - /* - * A.1.2. Old Packets - * - * When a packet with Sequence Number S arrives, and - * S <= buf_ackno, the HC-Receiver will scan the table - * for the byte corresponding to S. (Indexing structures - * could reduce the complexity of this scan.) - */ - u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); - unsigned int index = ap->dccpap_buf_head; - - while (1) { - const u8 len = dccp_ackpkts_len(ap, index); - const u8 state = dccp_ackpkts_state(ap, index); - /* - * valid packets not yet in dccpap_buf have a reserved - * entry, with a len equal to 0. - */ - if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our - reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", - (unsigned long long) ackno); - ap->dccpap_buf[index] = state; - goto out; - } - /* len == 0 means one packet */ - if (delta < len + 1) - goto out_duplicate; - - delta -= len + 1; - if (++index == ap->dccpap_buf_len) - index = 0; - } - } - - ap->dccpap_buf_ackno = ackno; - dccp_timestamp(sk, &ap->dccpap_time); -out: - dccp_pr_debug(""); - dccp_ackpkts_print(ap); - return 0; - -out_duplicate: - /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost " - "packet: %llu\n", (unsigned long long) ackno); - return -EILSEQ; -} - -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, - int len) -{ - if (!dccp_debug) - return; - - printk("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long) ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; - const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); - - printk("%d,%d|", state, rl); - ++vector; - } - - printk("\n"); -} - -void dccp_ackpkts_print(const struct dccp_ackpkts *ap) -{ - dccp_ackvector_print(ap->dccpap_buf_ackno, - ap->dccpap_buf + ap->dccpap_buf_head, - ap->dccpap_buf_vector_len); -} -#endif - -static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) -{ - /* - * As we're keeping track of the ack vector size - * (dccpap_buf_vector_len) and the sent ack vector size - * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but - * keep this code here as in the future we'll implement a vector of - * ack records, as suggested in draft-ietf-dccp-spec-11.txt - * Appendix A. -acme - */ -#if 0 - ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; - if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) - ap->dccpap_buf_tail -= ap->dccpap_buf_len; -#endif - ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; -} - -void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, - u64 ackno) -{ - /* Check if we actually sent an ACK vector */ - if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - - if (ackno == ap->dccpap_ack_seqno) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu, ACKED!\n", - debug_prefix, 1, - (unsigned long long) ap->dccpap_ack_seqno, - (unsigned long long) ap->dccpap_ack_ackno); - dccp_ackpkts_trow_away_ack_record(ap); - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - } -} - -static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, - struct sock *sk, u64 ackno, - const unsigned char len, - const unsigned char *vector) -{ - unsigned char i; - - /* Check if we actually sent an ACK vector */ - if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - /* - * We're in the receiver half connection, so if the received an ACK - * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're - * not interested. - * - * Extra explanation with example: - * - * if we received an ACK vector with ackno 50, it can only be acking - * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). - */ - /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ - if (before48(ackno, ap->dccpap_ack_seqno)) { - /* dccp_pr_debug_cat("yes\n"); */ - return; - } - /* dccp_pr_debug_cat("no\n"); */ - - i = len; - while (i--) { - const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); - u64 ackno_end_rl; - - dccp_set_seqno(&ackno_end_rl, ackno - rl); - - /* - * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, - * ap->dccpap_ack_seqno, ackno); - */ - if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & - DCCP_ACKPKTS_STATE_MASK) >> 6; - /* dccp_pr_debug_cat("yes\n"); */ - - if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = - dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK vector 0, len=%d, " - "ack_seqno=%llu, ack_ackno=%llu, " - "ACKED!\n", - debug_prefix, len, - (unsigned long long) - ap->dccpap_ack_seqno, - (unsigned long long) - ap->dccpap_ack_ackno); - dccp_ackpkts_trow_away_ack_record(ap); - } - /* - * If dccpap_ack_seqno was not received, no problem - * we'll send another ACK vector. - */ - ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; - break; - } - /* dccp_pr_debug_cat("no\n"); */ - - dccp_set_seqno(&ackno, ackno_end_rl - 1); - ++vector; - } -} diff --git a/net/dccp/output.c b/net/dccp/output.c index ea6d0e91e511..4786bdcddcc9 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -16,6 +16,7 @@ #include <net/sock.h> +#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = - dcb->dccpd_service; + dp->dccps_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) err = dccp_wait_for_ccid(sk, skb, timeo); if (err == 0) { - const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int len = skb->len; @@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); dcb->dccpd_type = DCCP_PKT_DATAACK; - /* - * FIXME: we really should have a - * dccps_ack_pending or use icsk. - */ - } else if (inet_csk_ack_scheduled(sk) || - dp->dccps_timestamp_echo != 0 || - (dp->dccps_options.dccpo_send_ack_vector && - ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && - ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + } else if (dccp_ack_pending(sk)) dcb->dccpd_type = DCCP_PKT_DATAACK; else dcb->dccpd_type = DCCP_PKT_DATA; @@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { struct dccp_hdr *dh; + struct dccp_request_sock *dreq; const int dccp_header_size = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_response); @@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); skb->csum = 0; + dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); @@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + dccp_hdr_set_seq(dh, dreq->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); + dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, inet_rsk(req)->rmt_addr); @@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, MAX_DCCP_HEADER); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - /* FIXME: set service to something meaningful, coming - * from userspace*/ - DCCP_SKB_CB(skb)->dccpd_service = 0; skb->csum = 0; skb_set_owner_w(skb, sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 18a0e69c9dc7..a1cfd0e9e3bc 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); static inline int dccp_listen_start(struct sock *sk) { - dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + struct dccp_sock *dp = dccp_sk(sk); + + dp->dccps_role = DCCP_ROLE_LISTEN; + /* + * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) + * before calling listen() + */ + if (dccp_service_not_initialized(sk)) + return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } +static int dccp_setsockopt_service(struct sock *sk, const u32 service, + char __user *optval, int optlen) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_service_list *sl = NULL; + + if (service == DCCP_SERVICE_INVALID_VALUE || + optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) + return -EINVAL; + + if (optlen > sizeof(service)) { + sl = kmalloc(optlen, GFP_KERNEL); + if (sl == NULL) + return -ENOMEM; + + sl->dccpsl_nr = optlen / sizeof(u32) - 1; + if (copy_from_user(sl->dccpsl_list, + optval + sizeof(service), + optlen - sizeof(service)) || + dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { + kfree(sl); + return -EFAULT; + } + } + + lock_sock(sk); + dp->dccps_service = service; + + if (dp->dccps_service_list != NULL) + kfree(dp->dccps_service_list); + + dp->dccps_service_list = sl; + release_sock(sk); + return 0; +} + int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - lock_sock(sk); + if (optname == DCCP_SOCKOPT_SERVICE) + return dccp_setsockopt_service(sk, val, optval, optlen); + lock_sock(sk); dp = dccp_sk(sk); err = 0; @@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, return err; } +static int dccp_getsockopt_service(struct sock *sk, int len, + u32 __user *optval, + int __user *optlen) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct dccp_service_list *sl; + int err = -ENOENT, slen = 0, total_len = sizeof(u32); + + lock_sock(sk); + if (dccp_service_not_initialized(sk)) + goto out; + + if ((sl = dp->dccps_service_list) != NULL) { + slen = sl->dccpsl_nr * sizeof(u32); + total_len += slen; + } + + err = -EINVAL; + if (total_len > len) + goto out; + + err = 0; + if (put_user(total_len, optlen) || + put_user(dp->dccps_service, optval) || + (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) + err = -EFAULT; +out: + release_sock(sk); + return err; +} + int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) + if (len < sizeof(int)) return -EINVAL; dp = dccp_sk(sk); @@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: val = dp->dccps_packet_size; + len = sizeof(dp->dccps_packet_size); break; + case DCCP_SOCKOPT_SERVICE: + return dccp_getsockopt_service(sk, len, + (u32 __user *)optval, optlen); + case 128 ... 191: + return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, + len, (u32 __user *)optval, optlen); + case 192 ... 255: + return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, + len, (u32 __user *)optval, optlen); default: return -ENOPROTOOPT; } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 348f36b529f7..1186dc44cdff 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -452,7 +452,7 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, int gfp) +static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) { struct dn_scp *scp; struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); @@ -804,7 +804,7 @@ static int dn_auto_bind(struct socket *sock) return rv; } -static int dn_confirm_accept(struct sock *sk, long *timeo, int allocation) +static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) { struct dn_scp *scp = DN_SK(sk); DEFINE_WAIT(wait); diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 53633d352868..c96c767b1f74 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -117,7 +117,7 @@ try_again: * The eventual aim is for each socket to have a cached header size * for its outgoing packets, and to set hdr from this when sk != NULL. */ -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri) +struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri) { struct sk_buff *skb; int hdr = 64; @@ -210,7 +210,8 @@ static void dn_nsp_rtt(struct sock *sk, long rtt) * * Returns: The number of times the packet has been sent previously */ -static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, int gfp) +static inline unsigned dn_nsp_clone_and_send(struct sk_buff *skb, + gfp_t gfp) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sk_buff *skb2; @@ -350,7 +351,8 @@ static unsigned short *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *sk return ptr; } -void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, int gfp, int oth) +void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, + gfp_t gfp, int oth) { struct dn_scp *scp = DN_SK(sk); struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -517,7 +519,7 @@ static int dn_nsp_retrans_conn_conf(struct sock *sk) return 0; } -void dn_send_conn_conf(struct sock *sk, int gfp) +void dn_send_conn_conf(struct sock *sk, gfp_t gfp) { struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb = NULL; @@ -549,7 +551,8 @@ void dn_send_conn_conf(struct sock *sk, int gfp) static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, int gfp, struct dst_entry *dst, + unsigned short reason, gfp_t gfp, + struct dst_entry *dst, int ddl, unsigned char *dd, __u16 rem, __u16 loc) { struct sk_buff *skb = NULL; @@ -591,7 +594,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, int gfp) + unsigned short reason, gfp_t gfp) { struct dn_scp *scp = DN_SK(sk); int ddl = 0; @@ -612,7 +615,7 @@ void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg, { struct dn_skb_cb *cb = DN_SKB_CB(skb); int ddl = 0; - int gfp = GFP_ATOMIC; + gfp_t gfp = GFP_ATOMIC; dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl, NULL, cb->src_port, cb->dst_port); @@ -624,7 +627,7 @@ void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval) struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb; unsigned char *ptr; - int gfp = GFP_ATOMIC; + gfp_t gfp = GFP_ATOMIC; if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL) return; @@ -659,7 +662,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) unsigned char menuver; struct dn_skb_cb *cb; unsigned char type = 1; - int allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC; + gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC; struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation); if (!skb) diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 4a62093eb343..34fdac51df96 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -406,7 +406,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned long network = 0; rcu_read_lock(); - idev = __in_dev_get(dev); + idev = __in_dev_get_rcu(dev); if (idev) { if (idev->ifa_list) network = ntohl(idev->ifa_list->ifa_address) & diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 87a052a9a84f..68a5ca866442 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -146,6 +146,19 @@ int eth_rebuild_header(struct sk_buff *skb) return 0; } +static inline unsigned int compare_eth_addr(const unsigned char *__a, const unsigned char *__b) +{ + const unsigned short *dest = (unsigned short *) __a; + const unsigned short *devaddr = (unsigned short *) __b; + unsigned int res; + + BUILD_BUG_ON(ETH_ALEN != 6); + res = ((dest[0] ^ devaddr[0]) | + (dest[1] ^ devaddr[1]) | + (dest[2] ^ devaddr[2])) != 0; + + return res; +} /* * Determine the packet's protocol ID. The rule here is that we @@ -158,16 +171,15 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) struct ethhdr *eth; unsigned char *rawp; - skb->mac.raw=skb->data; + skb->mac.raw = skb->data; skb_pull(skb,ETH_HLEN); eth = eth_hdr(skb); - if(*eth->h_dest&1) - { - if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) - skb->pkt_type=PACKET_BROADCAST; + if (*eth->h_dest&1) { + if (!compare_eth_addr(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; else - skb->pkt_type=PACKET_MULTICAST; + skb->pkt_type = PACKET_MULTICAST; } /* @@ -178,10 +190,9 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * seems to set IFF_PROMISC. */ - else if(1 /*dev->flags&IFF_PROMISC*/) - { - if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) - skb->pkt_type=PACKET_OTHERHOST; + else if(1 /*dev->flags&IFF_PROMISC*/) { + if (unlikely(compare_eth_addr(eth->h_dest, dev->dev_addr))) + skb->pkt_type = PACKET_OTHERHOST; } if (ntohs(eth->h_proto) >= 1536) diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 03a47343ddc7..6059e9e37123 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -230,7 +230,7 @@ static int __init ieee80211_init(void) struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = create_proc_entry(DRV_NAME, S_IFDIR, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index c9aaff3fea1e..eed07bbbe6b6 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -207,7 +207,7 @@ void ieee80211_txb_free(struct ieee80211_txb *txb) } static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, - int gfp_mask) + gfp_t gfp_mask) { struct ieee80211_txb *txb; int i; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8bf312bdea13..b425748f02d7 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh) neigh->type = inet_addr_type(addr); rcu_read_lock(); - in_dev = rcu_dereference(__in_dev_get(dev)); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) { rcu_read_unlock(); return -EINVAL; @@ -697,12 +697,6 @@ void arp_send(int type, int ptype, u32 dest_ip, arp_xmit(skb); } -static void parp_redo(struct sk_buff *skb) -{ - nf_reset(skb); - arp_rcv(skb, skb->dev, NULL, skb->dev); -} - /* * Process an arp request. */ @@ -922,6 +916,11 @@ out: return 0; } +static void parp_redo(struct sk_buff *skb) +{ + arp_process(skb); +} + /* * Receive an arp request from the device layer. @@ -990,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 1; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 1; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1; return 0; } return -ENXIO; @@ -1096,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev) ipv4_devconf.proxy_arp = 0; return 0; } - if (__in_dev_get(dev)) { - __in_dev_get(dev)->cnf.proxy_arp = 0; + if (__in_dev_get_rtnl(dev)) { + __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0; return 0; } return -ENXIO; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ba2895ae8151..74f2207e131a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg goto out; rc = -ENOBUFS; - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { in_dev = inetdev_init(dev); if (!in_dev) goto out; @@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if (colon) *colon = ':'; - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -748,7 +748,7 @@ rarok: static int inet_gifconf(struct net_device *dev, char __user *buf, int len) { - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -791,7 +791,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; @@ -818,7 +818,7 @@ no_in_dev: read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; for_primary_ifa(in_dev) { @@ -887,7 +887,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop if (dev) { rcu_read_lock(); - if ((in_dev = __in_dev_get(dev))) + if ((in_dev = __in_dev_get_rcu(dev))) addr = confirm_addr_indev(in_dev, dst, local, scope); rcu_read_unlock(); @@ -897,7 +897,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop read_lock(&dev_base_lock); rcu_read_lock(); for (dev = dev_base; dev; dev = dev->next) { - if ((in_dev = __in_dev_get(dev))) { + if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; @@ -957,7 +957,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1078,7 +1078,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (idx > s_idx) s_ip_idx = 0; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); continue; } @@ -1149,7 +1149,7 @@ void inet_forward_change(void) for (dev = dev_base; dev; dev = dev->next) { struct in_device *in_dev; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) in_dev->cnf.forwarding = on; rcu_read_unlock(); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b5a09d1b90b..1b18ce66e7b7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -5,6 +5,7 @@ #include <net/esp.h> #include <asm/scatterlist.h> #include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <net/icmp.h> @@ -42,10 +43,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3; - clen = (clen + 2 + blksize-1)&~(blksize-1); + blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) - clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); + clen = ALIGN(clen, esp->conf.padlen); if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) goto error; @@ -143,7 +144,7 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc struct ip_esp_hdr *esph; struct esp_data *esp = x->data; struct sk_buff *trailer; - int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -304,16 +305,16 @@ static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); if (x->props.mode) { - mtu = (mtu + 2 + blksize-1)&~(blksize-1); + mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ - mtu += 2 + blksize; + mtu = ALIGN(mtu + 2, 4) + blksize - 4; } if (esp->conf.padlen) - mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); + mtu = ALIGN(mtu, esp->conf.padlen); return mtu + x->props.header_len + esp->auth.icv_trunc_len; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e1379f71269..e61bc7177eb1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, no_addr = rpf = 0; rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); @@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d41219e8037c..186f20c4a45e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, rta->rta_oif = &dev->ifindex; if (colon) { struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); if (!in_dev) return -ENODEV; *colon = ':'; @@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev) } if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) continue; - if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) + if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; spin_lock_bh(&fib_multipath_lock); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1b63b4824164..0093ea08c7f5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.403" +#define VERSION "0.404" #include <linux/config.h> #include <asm/uaccess.h> @@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) Consider a node 'n' and its parent 'tp'. If n is a leaf, every bit in its key is significant. Its presence is - necessitaded by path compression, since during a tree traversal (when + necessitated by path compression, since during a tree traversal (when searching for a leaf - unless we are doing an insertion) we will completely ignore all skipped bits we encounter. Thus we need to verify, at the end of a potentially successful search, that we have indeed been walking the @@ -286,6 +286,8 @@ static inline void check_tnode(const struct tnode *tn) static int halve_threshold = 25; static int inflate_threshold = 50; +static int halve_threshold_root = 15; +static int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) @@ -449,6 +451,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) int i; int err = 0; struct tnode *old_tn; + int inflate_threshold_use; + int halve_threshold_use; if (!tn) return NULL; @@ -541,10 +545,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) check_tnode(tn); + /* Keep root node larger */ + + if(!tn->parent) + inflate_threshold_use = inflate_threshold_root; + else + inflate_threshold_use = inflate_threshold; + err = 0; while ((tn->full_children > 0 && 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= - inflate_threshold * tnode_child_length(tn))) { + inflate_threshold_use * tnode_child_length(tn))) { old_tn = tn; tn = inflate(t, tn); @@ -564,10 +575,18 @@ static struct node *resize(struct trie *t, struct tnode *tn) * node is above threshold. */ + + /* Keep root node larger */ + + if(!tn->parent) + halve_threshold_use = halve_threshold_root; + else + halve_threshold_use = halve_threshold; + err = 0; while (tn->bits > 1 && 100 * (tnode_child_length(tn) - tn->empty_children) < - halve_threshold * tnode_child_length(tn)) { + halve_threshold_use * tnode_child_length(tn)) { old_tn = tn; tn = halve(t, tn); @@ -836,11 +855,12 @@ static void trie_init(struct trie *t) #endif } -/* readside most use rcu_read_lock currently dump routines +/* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) +static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { + struct hlist_head *head = &l->list; struct hlist_node *node; struct leaf_info *li; @@ -853,7 +873,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) static inline struct list_head * get_fa_head(struct leaf *l, int plen) { - struct leaf_info *li = find_leaf_info(&l->list, plen); + struct leaf_info *li = find_leaf_info(l, plen); if (!li) return NULL; @@ -1085,7 +1105,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) } if (tp && tp->pos + tp->bits > 32) - printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", + printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ @@ -1248,7 +1268,7 @@ err: } -/* should be clalled with rcu_read_lock */ +/* should be called with rcu_read_lock */ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res) @@ -1590,7 +1610,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); l = fib_find_node(t, key); - li = find_leaf_info(&l->list, plen); + li = find_leaf_info(l, plen); list_del_rcu(&fa->fa_list); @@ -1714,7 +1734,6 @@ static int fn_trie_flush(struct fib_table *tb) t->revision++; - rcu_read_lock(); for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); @@ -1722,7 +1741,6 @@ static int fn_trie_flush(struct fib_table *tb) trie_leaf_remove(t, ll->key); ll = l; } - rcu_read_unlock(); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); @@ -1833,16 +1851,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi i++; continue; } - if (fa->fa_info->fib_nh == NULL) { - printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } - if (fa->fa_info == NULL) { - printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } + BUG_ON(!fa->fa_info); if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -1965,7 +1974,7 @@ struct fib_table * __init fib_hash_init(int id) trie_main = t; if (id == RT_TABLE_LOCAL) - printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); + printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; } @@ -2029,7 +2038,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, iter->tnode = (struct tnode *) n; iter->trie = t; iter->index = 0; - iter->depth = 0; + iter->depth = 1; return n; } return NULL; @@ -2274,11 +2283,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "<local>:\n"); else seq_puts(seq, "<main>:\n"); - } else { - seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", - NIPQUAD(prf), tn->pos); - } + } + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", + NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + tn->empty_children); + } else { struct leaf *l = (struct leaf *) n; int i; @@ -2287,7 +2297,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth); seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); for (i = 32; i >= 0; i--) { - struct leaf_info *li = find_leaf_info(&l->list, i); + struct leaf_info *li = find_leaf_info(l, i); if (li) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2383,7 +2393,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; for (i=32; i>=0; i--) { - struct leaf_info *li = find_leaf_info(&l->list, i); + struct leaf_info *li = find_leaf_info(l, i); struct fib_alias *fa; u32 mask, prefix; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 24eb56ae1b5a..90dca711ac9f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -188,7 +188,7 @@ struct icmp_err icmp_err_convert[] = { /* Control parameters for ECHO replies. */ int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts; +int sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ int sysctl_icmp_ignore_bogus_error_responses; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44607f4767b8..8b6d3939e1e6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) } if (dev) { imr->imr_ifindex = dev->ifindex; - idev = __in_dev_get(dev); + idev = __in_dev_get_rtnl(dev); } return idev; } @@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) } pmc->sources = NULL; pmc->sfmode = MCAST_EXCLUDE; - pmc->sfcount[MCAST_EXCLUDE] = 0; + pmc->sfcount[MCAST_INCLUDE] = 0; pmc->sfcount[MCAST_EXCLUDE] = 1; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fe3c6d3d0c91..94468a76c5b4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -494,7 +494,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const unsigned int __nocast priority) + const gfp_t priority) { struct sock *newsk = sk_clone(sk, priority); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4d1502a49852..a010e9a68811 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); write_lock(&ehead->lock); if (hlist_unhashed(&tw->tw_node)) { @@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; + struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -106,11 +106,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_dport = inet->dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; - tw->tw_hashent = sk->sk_hashent; + tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; tw->tw_prot = sk->sk_prot_creator; atomic_set(&tw->tw_refcnt, 1); inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } return tw; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f0d5740d7e22..896ce3f8f53a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev) return -EADDRNOTAVAIL; dev = rt->u.dst.dev; ip_rt_put(rt); - if (__in_dev_get(dev) == NULL) + if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; t->mlink = dev->ifindex; - ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr); + ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); } return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9dbf5909f3a6..302b7eb507c9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) goto failure; in_dev->cnf.rp_filter = 0; @@ -278,7 +278,7 @@ static int vif_delete(int vifi) dev_set_allmulti(dev, -1); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { in_dev->cnf.mc_forwarding--; ip_rt_multicast_event(in_dev); } @@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return -EINVAL; } - if ((in_dev = __in_dev_get(dev)) == NULL) + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; in_dev->cnf.mc_forwarding++; dev_set_allmulti(dev, +1); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 6e092dadb388..fc6f95aaa969 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = { /* * Replace a segment of data with a new segment */ -int ip_vs_skb_replace(struct sk_buff *skb, int pri, +int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, char *o_buf, int o_len, char *n_buf, int n_len) { struct iphdr *iph; diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index e11952ea17af..f828fa2eb7de 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (s_addr==cp->caddr && s_port==cp->cport && d_port==cp->vport && d_addr==cp->vaddr && + ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +/* Get reference to connection template */ +struct ip_vs_conn *ip_vs_ct_in_get +(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +{ + unsigned hash; + struct ip_vs_conn *cp; + + hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + + ct_read_lock(hash); + + list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (s_addr==cp->caddr && s_port==cp->cport && + d_port==cp->vport && d_addr==cp->vaddr && + cp->flags & IP_VS_CONN_F_TEMPLATE && + protocol==cp->protocol) { + /* HIT */ + atomic_inc(&cp->refcnt); + goto out; + } + } + cp = NULL; + + out: + ct_read_unlock(hash); + + IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", + ip_vs_proto_name(protocol), + NIPQUAD(s_addr), ntohs(s_port), + NIPQUAD(d_addr), ntohs(d_port), + cp?"hit":"not hit"); + + return cp; +} /* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. @@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so increase the inactive connection counter because it is in TCP SYNRECV state (inactive) or other protocol inacive state */ @@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) atomic_read(&dest->refcnt)); /* Update the connection counters */ - if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { /* It is a normal connection, so decrease the inactconns or activeconns counter */ if (cp->flags & IP_VS_CONN_F_INACTIVE) { @@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) /* * Invalidate the connection template */ - if (ct->cport) { + if (ct->vport != 65535) { if (ip_vs_conn_unhash(ct)) { ct->dport = 65535; ct->vport = 65535; @@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void) ct_write_lock_bh(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) + if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3ac7eeca04ac..981cc3244ef2 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, ports[1]); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, iph->daddr, ports[1], dest->addr, dest->port, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; @@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> */ if (svc->fwmark) - ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, + ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0); else - ct = ip_vs_conn_in_get(iph->protocol, snet, 0, + ct = ip_vs_ct_in_get(iph->protocol, snet, 0, iph->daddr, 0); if (!ct || !ip_vs_check_template(ct)) { @@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, snet, 0, htonl(svc->fwmark), 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, dest->addr, 0, - 0, + IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 574d1f509b46..2e5ced3d8062 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); for (i=0; i<m->nr_conns; i++) { + unsigned flags; + s = (struct ip_vs_sync_conn *)p; - cp = ip_vs_conn_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + flags = ntohs(s->flags); + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport); + else + cp = ip_vs_ct_in_get(s->protocol, + s->caddr, s->cport, + s->vaddr, s->vport); if (!cp) { cp = ip_vs_conn_new(s->protocol, s->caddr, s->cport, s->vaddr, s->vport, s->daddr, s->dport, - ntohs(s->flags), NULL); + flags, NULL); if (!cp) { IP_VS_ERR("ip_vs_conn_new failed\n"); return; @@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } else if (!cp->dest) { /* it is an entry created by the synchronization */ cp->state = ntohs(s->state); - cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; + cp->flags = flags | IP_VS_CONN_F_HASHED; } /* Note that we don't touch its state and flags if it is a normal entry. */ - if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { + if (flags & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(&cp->in_seq, opt, sizeof(*opt)); p += FULL_CONN_SIZE; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 30aa8e2ee214..7d917e4ce1d9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS IF unsure, say `N'. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m + help + This option enables support for a netlink-based userspace interface + + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL @@ -129,6 +137,23 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. +config IP_NF_PPTP + tristate 'PPTP protocol support' + depends on IP_NF_CONNTRACK + help + This module adds support for PPTP (Point to Point Tunnelling + Protocol, RFC2637) connection tracking and NAT. + + If you are running PPTP sessions over a stateful firewall or NAT + box, you may want to enable this feature. + + Please note that not all PPTP modes of operation are supported yet. + For more info, read top of the file + net/ipv4/netfilter/ip_conntrack_pptp.c + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + config IP_NF_QUEUE tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help @@ -474,9 +499,14 @@ config IP_NF_TARGET_LOG To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (OBSOLETE)" depends on IP_NF_IPTABLES ---help--- + + This option enables the old IPv4-only "ipt_ULOG" implementation + which has been obsoleted by the new "nfnetlink_log" code (see + CONFIG_NETFILTER_NETLINK_LOG). + This option adds a `ULOG' target, which allows you to create rules in any iptables table. The packet is passed to a userspace logging daemon using netlink multicast sockets; unlike the LOG target @@ -513,6 +543,17 @@ config IP_NF_TARGET_TCPMSS To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_NFQUEUE + tristate "NFQUEUE Target Support" + depends on IP_NF_IPTABLES + help + This Target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + # NAT + specific targets config IP_NF_NAT tristate "Full NAT" @@ -613,6 +654,12 @@ config IP_NF_NAT_AMANDA default IP_NF_NAT if IP_NF_AMANDA=y default m if IP_NF_AMANDA=m +config IP_NF_NAT_PPTP + tristate + depends on IP_NF_NAT!=n && IP_NF_PPTP!=n + default IP_NF_NAT if IP_NF_PPTP=y + default m if IP_NF_PPTP=m + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" @@ -774,11 +821,5 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. -config IP_NF_CONNTRACK_NETLINK - tristate 'Connection tracking netlink interface' - depends on IP_NF_CONNTRACK && NETFILTER_NETLINK - help - This option enables support for a netlink-based userspace interface - endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 1ba0db746817..dab4b58dd31e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,7 +4,11 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o -iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o + +ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o +ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o @@ -17,6 +21,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o # connection tracking helpers +obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o @@ -24,6 +29,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o # NAT helpers +obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o @@ -35,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches @@ -87,6 +93,7 @@ obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o +obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o @@ -96,4 +103,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o -obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index fa1634256680..a7969286e6e7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -716,8 +716,10 @@ static int translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct arpt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) { ret = -ENOMEM; return ret; diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index dc20881004bc..fa3f914117ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); + ip_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 19cba16e6e1e..07a80b56e8dc 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) /* Just find a expectation corresponding to a tuple. */ struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; @@ -1112,42 +1112,49 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -static inline void ct_add_counters(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ -#ifdef CONFIG_IP_NF_CT_ACCT - if (skb) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - } -#endif -} - -/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ -void ip_ct_refresh_acct(struct ip_conntrack *ct, +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + unsigned long extra_jiffies, + int do_acct) { + int event = 0; + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + IP_NF_ASSERT(skb); + + write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - ct_add_counters(ct, ctinfo, skb); + event = IPCT_REFRESH; } else { - write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - ip_conntrack_event_cache(IPCT_REFRESH, skb); + event = IPCT_REFRESH; } - ct_add_counters(ct, ctinfo, skb); - write_unlock_bh(&ip_conntrack_lock); } + +#ifdef CONFIG_IP_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) + || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) + event |= IPCT_COUNTER_FILLING; + } +#endif + + write_unlock_bh(&ip_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (event) + ip_conntrack_event_cache(event, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c new file mode 100644 index 000000000000..926a6684643d --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -0,0 +1,806 @@ +/* + * ip_conntrack_pptp.c - Version 3.0 + * + * Connection tracking support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * Limitations: + * - We blindly assume that control connections are always + * established in PNS->PAC direction. This is a violation + * of RFFC2673 + * - We can only support one single call within each session + * + * TODO: + * - testing of incoming PPTP calls + * + * Changes: + * 2002-02-05 - Version 1.3 + * - Call ip_conntrack_unexpect_related() from + * pptp_destroy_siblings() to destroy expectations in case + * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen + * (Philip Craig <philipc@snapgear.com>) + * - Add Version information at module loadtime + * 2002-02-10 - Version 1.6 + * - move to C99 style initializers + * - remove second expectation if first arrives + * 2004-10-22 - Version 2.0 + * - merge Mandrake's 2.6.x port with recent 2.6.x API changes + * - fix lots of linear skb assumptions from Mandrake's port + * 2005-06-10 - Version 2.1 + * - use ip_conntrack_expect_free() instead of kfree() on the + * expect's (which are from the slab for quite some time) + * 2005-06-10 - Version 3.0 + * - port helper to post-2.6.11 API changes, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * 2005-07-30 - Version 3.1 + * - port helper to 2.6.13 API changes + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_CT_PPTP_VERSION "3.1" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); + +static DEFINE_SPINLOCK(ip_pptp_lock); + +int +(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply); + +void +(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp); + +#if 0 +/* PptpControlMessageType names */ +const char *pptp_msg_name[] = { + "UNKNOWN_MESSAGE", + "START_SESSION_REQUEST", + "START_SESSION_REPLY", + "STOP_SESSION_REQUEST", + "STOP_SESSION_REPLY", + "ECHO_REQUEST", + "ECHO_REPLY", + "OUT_CALL_REQUEST", + "OUT_CALL_REPLY", + "IN_CALL_REQUEST", + "IN_CALL_REPLY", + "IN_CALL_CONNECT", + "CALL_CLEAR_REQUEST", + "CALL_DISCONNECT_NOTIFY", + "WAN_ERROR_NOTIFY", + "SET_LINK_INFO" +}; +EXPORT_SYMBOL(pptp_msg_name); +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS + +#define PPTP_GRE_TIMEOUT (10 MINS) +#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) + +static void pptp_expectfn(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + DEBUGP("increasing timeouts\n"); + + /* increase timeout of GRE data channel conntrack entry */ + ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; + ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; + + /* Can you see how rusty this code is, compared with the pre-2.6.11 + * one? That's what happened to my shiny newnat of 2002 ;( -HW */ + + if (!ip_nat_pptp_hook_expectfn) { + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_expect *exp_other; + + /* obviously this tuple inversion only works until you do NAT */ + invert_tuplepr(&inv_t, &exp->tuple); + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&inv_t); + + exp_other = ip_conntrack_expect_find(&inv_t); + if (exp_other) { + /* delete other expectation. */ + DEBUGP("found\n"); + ip_conntrack_unexpect_related(exp_other); + ip_conntrack_expect_put(exp_other); + } else { + DEBUGP("not found\n"); + } + } else { + /* we need more than simple inversion */ + ip_nat_pptp_hook_expectfn(ct, exp); + } +} + +static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_expect *exp; + + DEBUGP("trying to timeout ct or exp for tuple "); + DUMP_TUPLE(t); + + h = ip_conntrack_find_get(t, NULL); + if (h) { + struct ip_conntrack *sibling = tuplehash_to_ctrack(h); + DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + sibling->proto.gre.timeout = 0; + sibling->proto.gre.stream_timeout = 0; + if (del_timer(&sibling->timeout)) + sibling->timeout.function((unsigned long)sibling); + ip_conntrack_put(sibling); + return 1; + } else { + exp = ip_conntrack_expect_find(t); + if (exp) { + DEBUGP("unexpect_related of expect %p\n", exp); + ip_conntrack_unexpect_related(exp); + ip_conntrack_expect_put(exp); + return 1; + } + } + + return 0; +} + + +/* timeout GRE data connections */ +static void pptp_destroy_siblings(struct ip_conntrack *ct) +{ + struct ip_conntrack_tuple t; + + /* Since ct->sibling_list has literally rusted away in 2.6.11, + * we now need another way to find out about our sibling + * contrack and expects... -HW */ + + /* try original (pns->pac) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout original pns->pac ct/exp\n"); + + /* try reply (pac->pns) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout reply pac->pns ct/exp\n"); +} + +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ +static inline int +exp_gre(struct ip_conntrack *master, + u_int32_t seq, + __be16 callid, + __be16 peer_callid) +{ + struct ip_conntrack_tuple inv_tuple; + struct ip_conntrack_tuple exp_tuples[] = { + /* tuple in original direction, PNS->PAC */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, + .u = { .gre = { .key = peer_callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, + .u = { .gre = { .key = callid } }, + .protonum = IPPROTO_GRE + }, + }, + /* tuple in reply direction, PAC->PNS */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + .u = { .gre = { .key = callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + .u = { .gre = { .key = peer_callid } }, + .protonum = IPPROTO_GRE + }, + } + }; + struct ip_conntrack_expect *exp_orig, *exp_reply; + int ret = 1; + + exp_orig = ip_conntrack_expect_alloc(master); + if (exp_orig == NULL) + goto out; + + exp_reply = ip_conntrack_expect_alloc(master); + if (exp_reply == NULL) + goto out_put_orig; + + memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + + exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.u.all = 0; + exp_orig->mask.dst.u.all = 0; + exp_orig->mask.dst.u.gre.key = htons(0xffff); + exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.protonum = 0xff; + + exp_orig->master = master; + exp_orig->expectfn = pptp_expectfn; + exp_orig->flags = 0; + + exp_orig->dir = IP_CT_DIR_ORIGINAL; + + /* both expectations are identical apart from tuple */ + memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); + memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); + + exp_reply->dir = !exp_orig->dir; + + if (ip_nat_pptp_hook_exp_gre) + ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + else { + + DEBUGP("calling expect_related PNS->PAC"); + DUMP_TUPLE(&exp_orig->tuple); + + if (ip_conntrack_expect_related(exp_orig) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_put_both; + } + + DEBUGP("calling expect_related PAC->PNS"); + DUMP_TUPLE(&exp_reply->tuple); + + if (ip_conntrack_expect_related(exp_reply) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_unexpect_orig; + } + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { + DEBUGP("cannot keymap_add() exp\n"); + goto out_unexpect_both; + } + + invert_tuplepr(&inv_tuple, &exp_reply->tuple); + if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(master); + DEBUGP("cannot keymap_add() exp_inv\n"); + goto out_unexpect_both; + } + ret = 0; + } + +out_put_both: + ip_conntrack_expect_put(exp_reply); +out_put_orig: + ip_conntrack_expect_put(exp_orig); +out: + return ret; + +out_unexpect_both: + ip_conntrack_unexpect_related(exp_reply); +out_unexpect_orig: + ip_conntrack_unexpect_related(exp_orig); + goto out_put_both; +} + +static inline int +pptp_inbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg; + __be16 *cid, *pcid; + u_int32_t seq; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + + msg = ntohs(ctlh->messageType); + DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.srep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms new control session */ + if (info->sstate < PPTP_SESSION_REQUESTED) { + DEBUGP("%s without START_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->srep.resultCode == PPTP_START_OK) + info->sstate = PPTP_SESSION_CONFIRMED; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_STOP_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.strep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms end of control session */ + if (info->sstate > PPTP_SESSION_STOPREQ) { + DEBUGP("%s without STOP_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->strep.resultCode == PPTP_STOP_OK) + info->sstate = PPTP_SESSION_NONE; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_OUT_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.ocack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server accepted call, we now expect GRE frames */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->cstate != PPTP_CALL_OUT_REQ && + info->cstate != PPTP_CALL_OUT_CONF) { + DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); + break; + } + if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_NONE; + break; + } + + cid = &pptpReq->ocack.callID; + pcid = &pptpReq->ocack.peersCallID; + + info->pac_call_id = ntohs(*cid); + + if (htons(info->pns_call_id) != *pcid) { + DEBUGP("%s for unknown callid %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(*cid), ntohs(*pcid)); + + info->cstate = PPTP_CALL_OUT_CONF; + + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + break; + + case PPTP_IN_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + pcid = &pptpReq->icack.peersCallID; + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_REQ; + info->pac_call_id = ntohs(*pcid); + break; + + case PPTP_IN_CALL_CONNECT: + if (reqlen < sizeof(_pptpReq.iccon)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call established */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->sstate != PPTP_CALL_IN_REP + && info->sstate != PPTP_CALL_IN_CONF) { + DEBUGP("%s but never sent IN_CALL_REPLY\n", + pptp_msg_name[msg]); + break; + } + + pcid = &pptpReq->iccon.peersCallID; + cid = &info->pac_call_id; + + if (info->pns_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown CallID %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_CONF; + + /* we expect a GRE connection from PAC to PNS */ + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + + break; + + case PPTP_CALL_DISCONNECT_NOTIFY: + if (reqlen < sizeof(_pptpReq.disc)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms disconnect */ + cid = &pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + break; + + case PPTP_WAN_ERROR_NOTIFY: + break; + + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) + ? pptp_msg_name[msg]:pptp_msg_name[0], msg); + break; + } + + + if (ip_nat_pptp_hook_inbound) + return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; + +} + +static inline int +pptp_outbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg; + __be16 *cid, *pcid; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + + msg = ntohs(ctlh->messageType); + DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REQUEST: + /* client requests for new control session */ + if (info->sstate != PPTP_SESSION_NONE) { + DEBUGP("%s but we already have one", + pptp_msg_name[msg]); + } + info->sstate = PPTP_SESSION_REQUESTED; + break; + case PPTP_STOP_SESSION_REQUEST: + /* client requests end of control session */ + info->sstate = PPTP_SESSION_STOPREQ; + break; + + case PPTP_OUT_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.ocreq)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + /* FIXME: break; */ + } + + /* client initiating connection to server */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", + pptp_msg_name[msg]); + break; + } + info->cstate = PPTP_CALL_OUT_REQ; + /* track PNS call id */ + cid = &pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->pns_call_id = ntohs(*cid); + break; + case PPTP_IN_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* client answers incoming call */ + if (info->cstate != PPTP_CALL_IN_REQ + && info->cstate != PPTP_CALL_IN_REP) { + DEBUGP("%s without incall_req\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { + info->cstate = PPTP_CALL_NONE; + break; + } + pcid = &pptpReq->icack.peersCallID; + if (info->pac_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown call %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = ntohs(pptpReq->icack.callID); + break; + + case PPTP_CALL_CLEAR_REQUEST: + /* client requests hangup of call */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("CLEAR_CALL but no session\n"); + break; + } + /* FUTURE: iterate over all calls and check if + * call ID is valid. We don't do this without newnat, + * because we only know about last call */ + info->cstate = PPTP_CALL_CLEAR_REQ; + break; + case PPTP_SET_LINK_INFO: + break; + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0], msg); + /* unknown: no need to create GRE masq table entry */ + break; + } + + if (ip_nat_pptp_hook_outbound) + return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; +} + + +/* track caller id inside control connection, call expect_related */ +static int +conntrack_pptp_help(struct sk_buff **pskb, + struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) + +{ + struct pptp_pkt_hdr _pptph, *pptph; + struct tcphdr _tcph, *tcph; + u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + u_int32_t datalen; + int dir = CTINFO2DIR(ctinfo); + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + unsigned int nexthdr_off; + + int oldsstate, oldcstate; + int ret; + + /* don't do any tracking before tcp handshake complete */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ctinfo = %u, skipping\n", ctinfo); + return NF_ACCEPT; + } + + nexthdr_off = (*pskb)->nh.iph->ihl*4; + tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + BUG_ON(!tcph); + nexthdr_off += tcph->doff * 4; + datalen = tcplen - tcph->doff * 4; + + if (tcph->fin || tcph->rst) { + DEBUGP("RST/FIN received, timeouting GRE\n"); + /* can't do this after real newnat */ + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + } + + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + if (!pptph) { + DEBUGP("no full PPTP header, can't track\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_pptph); + datalen -= sizeof(_pptph); + + /* if it's not a control message we can't do anything with it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a control packet\n"); + return NF_ACCEPT; + } + + oldsstate = info->sstate; + oldcstate = info->cstate; + + spin_lock_bh(&ip_pptp_lock); + + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ + if (dir == IP_CT_DIR_ORIGINAL) + /* client -> server (PNS -> PAC) */ + ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + else + /* server -> client (PAC -> PNS) */ + ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + DEBUGP("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); + spin_unlock_bh(&ip_pptp_lock); + + return ret; +} + +/* control protocol helper */ +static struct ip_conntrack_helper pptp = { + .list = { NULL, NULL }, + .name = "pptp", + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 5 * 60, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xff + } + }, + .help = conntrack_pptp_help +}; + +extern void __exit ip_ct_proto_gre_fini(void); +extern int __init ip_ct_proto_gre_init(void); + +/* ip_conntrack_pptp initialization */ +static int __init init(void) +{ + int retcode; + + retcode = ip_ct_proto_gre_init(); + if (retcode < 0) + return retcode; + + DEBUGP(" registering helper\n"); + if ((retcode = ip_conntrack_helper_register(&pptp))) { + printk(KERN_ERR "Unable to register conntrack application " + "helper for pptp: %d\n", retcode); + ip_ct_proto_gre_fini(); + return retcode; + } + + printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&pptp); + ip_ct_proto_gre_fini(); + printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); +EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 71ef19d126d0..186646eb249f 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb, goto out; rcu_read_lock(); - in_dev = __in_dev_get(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->u.dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, ip_conntrack_expect_related(exp); ip_conntrack_expect_put(exp); - ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ); + ip_ct_refresh(ct, *pskb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 15aef3564742..166e6069f121 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, struct nfattr *nest_count = NFA_NEST(skb, type); u_int64_t tmp; - tmp = cpu_to_be64(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); - tmp = cpu_to_be64(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + tmp = htonl(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); NFA_NEST_END(skb, nest_count); @@ -833,7 +833,8 @@ out: static inline int ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) { - unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); + unsigned long d; + unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -948,6 +949,31 @@ ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) return 0; } +static inline int +ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; + struct ip_conntrack_protocol *proto; + u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + int err = 0; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0) + goto nfattr_failure; + + proto = ip_conntrack_proto_find_get(npt); + if (!proto) + return -EINVAL; + + if (proto->from_nfattr) + err = proto->from_nfattr(tb, ct); + ip_conntrack_proto_put(proto); + + return err; + +nfattr_failure: + return -ENOMEM; +} + static int ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) { @@ -973,6 +999,12 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) return err; } + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + DEBUGP("all done\n"); return 0; } @@ -1002,6 +1034,12 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (err < 0) goto err; + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + ct->helper = ip_conntrack_helper_find_get(rtuple); add_timer(&ct->timeout); @@ -1270,7 +1308,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; @@ -1318,7 +1356,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c new file mode 100644 index 000000000000..744abb9d377a --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -0,0 +1,328 @@ +/* + * ip_conntrack_proto_gre.c - Version 3.0 + * + * Connection tracking protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/list.h> + +static DEFINE_RWLOCK(ip_ct_gre_lock); +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + +#include <linux/netfilter_ipv4/listhelp.h> +#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> + +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); + +/* shamelessly stolen from ip_conntrack_proto_udp.c */ +#define GRE_TIMEOUT (30*HZ) +#define GRE_STREAM_TIMEOUT (180*HZ) + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ + NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ + NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) +#else +#define DEBUGP(x, args...) +#define DUMP_TUPLE_GRE(x) +#endif + +/* GRE KEYMAP HANDLING FUNCTIONS */ +static LIST_HEAD(gre_keymap_list); + +static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, + const struct ip_conntrack_tuple *t) +{ + return ((km->tuple.src.ip == t->src.ip) && + (km->tuple.dst.ip == t->dst.ip) && + (km->tuple.dst.protonum == t->dst.protonum) && + (km->tuple.dst.u.all == t->dst.u.all)); +} + +/* look up the source key for a given tuple */ +static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) +{ + struct ip_ct_gre_keymap *km; + u_int32_t key = 0; + + read_lock_bh(&ip_ct_gre_lock); + km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (km) + key = km->tuple.src.u.gre.key; + read_unlock_bh(&ip_ct_gre_lock); + + DEBUGP("lookup src key 0x%x up key for ", key); + DUMP_TUPLE_GRE(t); + + return key; +} + +/* add a single keymap entry, associate with specified master ct */ +int +ip_ct_gre_keymap_add(struct ip_conntrack *ct, + struct ip_conntrack_tuple *t, int reply) +{ + struct ip_ct_gre_keymap **exist_km, *km, *old; + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to add GRE keymap to non-pptp session\n"); + return -1; + } + + if (!reply) + exist_km = &ct->help.ct_pptp_info.keymap_orig; + else + exist_km = &ct->help.ct_pptp_info.keymap_reply; + + if (*exist_km) { + /* check whether it's a retransmission */ + old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (old == *exist_km) { + DEBUGP("retransmission\n"); + return 0; + } + + DEBUGP("trying to override keymap_%s for ct %p\n", + reply? "reply":"orig", ct); + return -EEXIST; + } + + km = kmalloc(sizeof(*km), GFP_ATOMIC); + if (!km) + return -ENOMEM; + + memcpy(&km->tuple, t, sizeof(*t)); + *exist_km = km; + + DEBUGP("adding new entry %p: ", km); + DUMP_TUPLE_GRE(&km->tuple); + + write_lock_bh(&ip_ct_gre_lock); + list_append(&gre_keymap_list, km); + write_unlock_bh(&ip_ct_gre_lock); + + return 0; +} + +/* destroy the keymap entries associated with specified master ct */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) +{ + DEBUGP("entering for ct %p\n", ct); + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); + return; + } + + write_lock_bh(&ip_ct_gre_lock); + if (ct->help.ct_pptp_info.keymap_orig) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_orig); + list_del(&ct->help.ct_pptp_info.keymap_orig->list); + kfree(ct->help.ct_pptp_info.keymap_orig); + ct->help.ct_pptp_info.keymap_orig = NULL; + } + if (ct->help.ct_pptp_info.keymap_reply) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_reply); + list_del(&ct->help.ct_pptp_info.keymap_reply->list); + kfree(ct->help.ct_pptp_info.keymap_reply); + ct->help.ct_pptp_info.keymap_reply = NULL; + } + write_unlock_bh(&ip_ct_gre_lock); +} + + +/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ + +/* invert gre part of tuple */ +static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->dst.u.gre.key = orig->src.u.gre.key; + tuple->src.u.gre.key = orig->dst.u.gre.key; + + return 1; +} + +/* gre hdr info to tuple */ +static int gre_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple) +{ + struct gre_hdr_pptp _pgrehdr, *pgrehdr; + u_int32_t srckey; + struct gre_hdr _grehdr, *grehdr; + + /* first only delinearize old RFC1701 GRE header */ + grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); + if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + /* try to behave like "ip_conntrack_proto_generic" */ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + return 1; + } + + /* PPTP header is variable length, only need up to the call_id field */ + pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); + if (!pgrehdr) + return 1; + + if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + return 0; + } + + tuple->dst.u.gre.key = pgrehdr->call_id; + srckey = gre_keymap_lookup(tuple); + tuple->src.u.gre.key = srckey; + + return 1; +} + +/* print gre part of tuple */ +static int gre_print_tuple(struct seq_file *s, + const struct ip_conntrack_tuple *tuple) +{ + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); +} + +/* print private data for conntrack */ +static int gre_print_conntrack(struct seq_file *s, + const struct ip_conntrack *ct) +{ + return seq_printf(s, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); +} + +/* Returns verdict for packet, and may modify conntrack */ +static int gre_packet(struct ip_conntrack *ct, + const struct sk_buff *skb, + enum ip_conntrack_info conntrackinfo) +{ + /* If we've seen traffic both ways, this is a GRE connection. + * Extend timeout. */ + if (ct->status & IPS_SEEN_REPLY) { + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.stream_timeout); + /* Also, more likely to be important, and not a probe. */ + set_bit(IPS_ASSURED_BIT, &ct->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); + } else + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int gre_new(struct ip_conntrack *ct, + const struct sk_buff *skb) +{ + DEBUGP(": "); + DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + /* initialize to sane value. Ideally a conntrack helper + * (e.g. in case of pptp) is increasing them */ + ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; + ct->proto.gre.timeout = GRE_TIMEOUT; + + return 1; +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory */ +static void gre_destroy(struct ip_conntrack *ct) +{ + struct ip_conntrack *master = ct->master; + DEBUGP(" entering\n"); + + if (!master) + DEBUGP("no master !?!\n"); + else + ip_ct_gre_keymap_destroy(master); +} + +/* protocol helper struct */ +static struct ip_conntrack_protocol gre = { + .proto = IPPROTO_GRE, + .name = "gre", + .pkt_to_tuple = gre_pkt_to_tuple, + .invert_tuple = gre_invert_tuple, + .print_tuple = gre_print_tuple, + .print_conntrack = gre_print_conntrack, + .packet = gre_packet, + .new = gre_new, + .destroy = gre_destroy, + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif +}; + +/* ip_conntrack_proto_gre initialization */ +int __init ip_ct_proto_gre_init(void) +{ + return ip_conntrack_protocol_register(&gre); +} + +void __exit ip_ct_proto_gre_fini(void) +{ + struct list_head *pos, *n; + + /* delete all keymap entries */ + write_lock_bh(&ip_ct_gre_lock); + list_for_each_safe(pos, n, &gre_keymap_list) { + DEBUGP("deleting keymap %p at module unload time\n", pos); + list_del(pos); + kfree(pos); + } + write_unlock_bh(&ip_ct_gre_lock); + + ip_conntrack_protocol_unregister(&gre); +} + +EXPORT_SYMBOL(ip_ct_gre_keymap_add); +EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 838d1d69b36e..98f0015dd255 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -296,8 +296,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], struct ip_conntrack_tuple *tuple) { if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) + || !tb[CTA_PROTO_ICMP_CODE-1]) return -1; tuple->dst.u.icmp.type = diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index a875f35e576d..59a4a0111dd3 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -416,6 +416,7 @@ static int sctp_packet(struct ip_conntrack *conntrack, && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { DEBUGP("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 1985abc59d24..d6701cafbcc2 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -341,17 +341,43 @@ static int tcp_print_conntrack(struct seq_file *s, static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, const struct ip_conntrack *ct) { + struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + read_lock_bh(&tcp_lock); NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), &ct->proto.tcp.state); read_unlock_bh(&tcp_lock); + NFA_NEST_END(skb, nest_parms); + return 0; nfattr_failure: read_unlock_bh(&tcp_lock); return -1; } + +static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} #endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) @@ -1014,7 +1040,8 @@ static int tcp_packet(struct ip_conntrack *conntrack, /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + set_bit(IPS_ASSURED_BIT, &conntrack->status); + ip_conntrack_event_cache(IPCT_STATUS, skb); } ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); @@ -1122,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, #endif diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ae3e3e655db5..dd476b191f4b 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -989,15 +989,15 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(ip_ct_refresh_acct); +EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); -EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); EXPORT_SYMBOL(ip_conntrack_tuple_taken); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1adedb743f60..c5e3abd24672 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -74,12 +74,14 @@ ip_nat_proto_find_get(u_int8_t protonum) return p; } +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); void ip_nat_proto_put(struct ip_nat_protocol *p) { module_put(p->me); } +EXPORT_SYMBOL_GPL(ip_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -111,6 +113,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) return csum_fold(csum_partial((char *)diffs, sizeof(diffs), oldcheck^0xFFFF)); } +EXPORT_SYMBOL(ip_nat_cheat_check); /* Is this tuple already taken? (not by us) */ int @@ -127,6 +130,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, invert_tuplepr(&reply, tuple); return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } +EXPORT_SYMBOL(ip_nat_used_tuple); /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ @@ -347,6 +351,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack, return NF_ACCEPT; } +EXPORT_SYMBOL(ip_nat_setup_info); /* Returns true if succeeded. */ static int @@ -387,10 +392,10 @@ manip_pkt(u_int16_t proto, } /* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +unsigned int ip_nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -417,12 +422,13 @@ unsigned int nat_packet(struct ip_conntrack *ct, } return NF_ACCEPT; } +EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; @@ -509,6 +515,7 @@ int icmp_reply_translation(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); /* Protocol registration. */ int ip_nat_protocol_register(struct ip_nat_protocol *proto) @@ -525,6 +532,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto) write_unlock_bh(&ip_nat_lock); return ret; } +EXPORT_SYMBOL(ip_nat_protocol_register); /* Noone stores the protocol anywhere; simply delete it. */ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) @@ -536,6 +544,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) /* Someone could be still looking at the proto in a bh. */ synchronize_net(); } +EXPORT_SYMBOL(ip_nat_protocol_unregister); #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) @@ -578,9 +587,11 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) return ret; } +EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif -int __init ip_nat_init(void) +static int __init ip_nat_init(void) { size_t i; @@ -622,10 +633,14 @@ static int clean_nat(struct ip_conntrack *i, void *data) return 0; } -/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ -void ip_nat_cleanup(void) +static void __exit ip_nat_cleanup(void) { ip_ct_iterate_cleanup(&clean_nat, NULL); ip_conntrack_destroyed = NULL; vfree(bysource); } + +MODULE_LICENSE("GPL"); + +module_init(ip_nat_init); +module_exit(ip_nat_cleanup); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index d2dd5d313556..5d506e0564d5 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, } return 1; } +EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX @@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_mangle_udp_packet); /* Adjust one found SACK option including checksum correction */ static void @@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, return 1; } +EXPORT_SYMBOL(ip_nat_seq_adjust); /* Setup NAT on this expected conntrack so it follows master. */ /* If we fail to get a free NAT slot, we'll get dropped on confirm */ @@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct, /* hook doesn't matter, but it has to do destination manip */ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } +EXPORT_SYMBOL(ip_nat_follow_master); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c new file mode 100644 index 000000000000..3cdd0684d30d --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -0,0 +1,401 @@ +/* + * ip_nat_pptp.c - Version 3.0 + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + * + * Changes: + * 2002-02-10 - Version 1.3 + * - Use ip_nat_mangle_tcp_packet() because of cloned skb's + * in local connections (Philip Craig <philipc@snapgear.com>) + * - add checks for magicCookie and pptp version + * - make argument list of pptp_{out,in}bound_packet() shorter + * - move to C99 style initializers + * - print version number at module loadtime + * 2003-09-22 - Version 1.5 + * - use SNATed tcp sourceport as callid, since we get called before + * TCP header is mangled (Philip Craig <philipc@snapgear.com>) + * 2004-10-22 - Version 2.0 + * - kernel 2.6.x version + * 2005-06-10 - Version 3.0 + * - kernel >= 2.6.11 version, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/netfilter_ipv4/ip_nat_pptp.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_NAT_PPTP_VERSION "3.0" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); + + +#if 0 +extern const char *pptp_msg_name[]; +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static void pptp_nat_expected(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + struct ip_conntrack *master = ct->master; + struct ip_conntrack_expect *other_exp; + struct ip_conntrack_tuple t; + struct ip_ct_pptp_master *ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info; + + ct_pptp_info = &master->help.ct_pptp_info; + nat_pptp_info = &master->nat.help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + + if (exp->dir == IP_CT_DIR_ORIGINAL) { + DEBUGP("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.protonum = IPPROTO_GRE; + } else { + DEBUGP("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + t.src.u.gre.key = + htons(master->nat.help.nat_pptp_info.pns_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + t.dst.u.gre.key = + htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.protonum = IPPROTO_GRE; + } + + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&t); + other_exp = ip_conntrack_expect_find(&t); + if (other_exp) { + ip_conntrack_unexpect_related(other_exp); + ip_conntrack_expect_put(other_exp); + DEBUGP("success\n"); + } else { + DEBUGP("not found!\n"); + } + + ip_nat_follow_master(ct, exp); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + u_int16_t msg, *cid = NULL, new_callid; + + new_callid = htons(ct_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid = &pptpReq->ocreq.callID; + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid = &pptpReq->icreq.callID; + break; + case PPTP_CALL_CLEAR_REQUEST: + cid = &pptpReq->clrreq.callID; + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + + IP_NF_ASSERT(cid); + + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_callid)); + + /* mangle packet */ + if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_callid), + (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + + return NF_ACCEPT; +} + +static int +pptp_exp_gre(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply) +{ + struct ip_ct_pptp_master *ct_pptp_info = + &expect_orig->master->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = + &expect_orig->master->nat.help.nat_pptp_info; + + struct ip_conntrack *ct = expect_orig->master; + + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation */ + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + /* alter expectation for PNS->PAC direction */ + invert_tuplepr(&inv_t, &expect_orig->tuple); + expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); + expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + + if (!ip_conntrack_expect_related(expect_orig)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_orig)\n"); + return 1; + } + + /* alter expectation for PAC->PNS direction */ + invert_tuplepr(&inv_t, &expect_reply->tuple); + expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); + expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + + if (!ip_conntrack_expect_related(expect_reply)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_reply)\n"); + ip_conntrack_unexpect_related(expect_orig); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { + DEBUGP("can't register original keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { + DEBUGP("can't register reply keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + ip_ct_gre_keymap_destroy(ct); + return 1; + } + + return 0; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; + + int ret = NF_ACCEPT, rv; + + new_pcid = htons(nat_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid = &pptpReq->ocack.peersCallID; + cid = &pptpReq->ocack.callID; + break; + case PPTP_IN_CALL_CONNECT: + pcid = &pptpReq->iccon.peersCallID; + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + break; + case PPTP_WAN_ERROR_NOTIFY: + pcid = &pptpReq->wanerr.peersCallID; + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid = &pptpReq->disc.callID; + break; + case PPTP_SET_LINK_INFO: + pcid = &pptpReq->setlink.peersCallID; + break; + + default: + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + IP_NF_ASSERT(pcid); + DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(*pcid), ntohs(new_pcid)); + + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)); + if (rv != NF_ACCEPT) + return rv; + + if (new_cid) { + IP_NF_ASSERT(cid); + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_cid)); + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_cid), + (char *)&new_cid, + sizeof(new_cid)); + if (rv != NF_ACCEPT) + return rv; + } + + /* check for earlier return value of 'switch' above */ + if (ret != NF_ACCEPT) + return ret; + + /* great, at least we don't need to resize packets */ + return NF_ACCEPT; +} + + +extern int __init ip_nat_proto_gre_init(void); +extern void __exit ip_nat_proto_gre_fini(void); + +static int __init init(void) +{ + int ret; + + DEBUGP("%s: registering NAT helper\n", __FILE__); + + ret = ip_nat_proto_gre_init(); + if (ret < 0) + return ret; + + BUG_ON(ip_nat_pptp_hook_outbound); + ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; + + BUG_ON(ip_nat_pptp_hook_inbound); + ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; + + BUG_ON(ip_nat_pptp_hook_exp_gre); + ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; + + BUG_ON(ip_nat_pptp_hook_expectfn); + ip_nat_pptp_hook_expectfn = &pptp_nat_expected; + + printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + DEBUGP("cleanup_module\n" ); + + ip_nat_pptp_hook_expectfn = NULL; + ip_nat_pptp_hook_exp_gre = NULL; + ip_nat_pptp_hook_inbound = NULL; + ip_nat_pptp_hook_outbound = NULL; + + ip_nat_proto_gre_fini(); + /* Make sure noone calls it, meanwhile */ + synchronize_net(); + + printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c new file mode 100644 index 000000000000..7c1285401672 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -0,0 +1,214 @@ +/* + * ip_nat_proto_gre.c - Version 2.0 + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* is key in given range between min and max */ +static int +gre_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int32_t key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohl(key) >= ntohl(min->gre.key) + && ntohl(key) <= ntohl(max->gre.key); +} + +/* generate unique tuple ... */ +static int +gre_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t key; + u_int16_t *keyptr; + unsigned int min, i, range_size; + + if (maniptype == IP_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + DEBUGP("%p: NATing GRE PPTP\n", conntrack); + min = 1; + range_size = 0xffff; + } else { + min = ntohl(range->min.gre.key); + range_size = ntohl(range->max.gre.key) - min + 1; + } + + DEBUGP("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; i < range_size; i++, key++) { + *keyptr = htonl(min + key % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + + DEBUGP("%p: no NAT mapping\n", conntrack); + + return 0; +} + +/* manipulate a GRE packet according to maniptype */ +static int +gre_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype) +{ + struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + unsigned int hdroff = iphdroff + iph->ihl*4; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) + return 0; + + greh = (void *)(*pskb)->data + hdroff; + pgreh = (struct gre_hdr_pptp *) greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype == IP_NAT_MANIP_DST) { + /* key manipulation is always dest */ + switch (greh->version) { + case 0: + if (!greh->key) { + DEBUGP("can't nat GRE w/o key\n"); + break; + } + if (greh->csum) { + /* FIXME: Never tested this code... */ + *(gre_csum(greh)) = + ip_nat_cheat_check(~*(gre_key(greh)), + tuple->dst.u.gre.key, + *(gre_csum(greh))); + } + *(gre_key(greh)) = tuple->dst.u.gre.key; + break; + case GRE_VERSION_PPTP: + DEBUGP("call_id -> 0x%04x\n", + ntohl(tuple->dst.u.gre.key)); + pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); + break; + default: + DEBUGP("can't nat unknown GRE version\n"); + return 0; + break; + } + } + return 1; +} + +/* print out a nat tuple */ +static unsigned int +gre_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.gre.key) + len += sprintf(buffer + len, "srckey=0x%x ", + ntohl(match->src.u.gre.key)); + + if (mask->dst.u.gre.key) + len += sprintf(buffer + len, "dstkey=0x%x ", + ntohl(match->src.u.gre.key)); + + return len; +} + +/* print a range of keys */ +static unsigned int +gre_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.gre.key != 0 + || range->max.gre.key != 0xFFFF) { + if (range->min.gre.key == range->max.gre.key) + return sprintf(buffer, "key 0x%x ", + ntohl(range->min.gre.key)); + else + return sprintf(buffer, "keys 0x%u-0x%u ", + ntohl(range->min.gre.key), + ntohl(range->max.gre.key)); + } else + return 0; +} + +/* nat helper struct */ +static struct ip_nat_protocol gre = { + .name = "GRE", + .protonum = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = gre_in_range, + .unique_tuple = gre_unique_tuple, + .print = gre_print, + .print_range = gre_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif +}; + +int __init ip_nat_proto_gre_init(void) +{ + return ip_nat_protocol_register(&gre); +} + +void __exit ip_nat_proto_gre_fini(void) +{ + ip_nat_protocol_unregister(&gre); +} diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 0ff368b131f6..30cd4e18c129 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, + CTINFO2DIR(ctinfo))) return NF_DROP; else return NF_ACCEPT; @@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum, } IP_NF_ASSERT(info); - return nat_packet(ct, ctinfo, hooknum, pskb); + return ip_nat_packet(ct, ctinfo, hooknum, pskb); } static unsigned int @@ -325,15 +325,10 @@ static int init_or_cleanup(int init) printk("ip_nat_init: can't setup rules.\n"); goto cleanup_nothing; } - ret = ip_nat_init(); - if (ret < 0) { - printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_rule_init; - } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { printk("ip_nat_init: can't register in hook.\n"); - goto cleanup_nat; + goto cleanup_rule_init; } ret = nf_register_hook(&ip_nat_out_ops); if (ret < 0) { @@ -374,8 +369,6 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_out_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); - cleanup_nat: - ip_nat_cleanup(); cleanup_rule_init: ip_nat_rule_cleanup(); cleanup_nothing: @@ -395,14 +388,4 @@ static void __exit fini(void) module_init(init); module_exit(fini); -EXPORT_SYMBOL(ip_nat_setup_info); -EXPORT_SYMBOL(ip_nat_protocol_register); -EXPORT_SYMBOL(ip_nat_protocol_unregister); -EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); -EXPORT_SYMBOL_GPL(ip_nat_proto_put); -EXPORT_SYMBOL(ip_nat_cheat_check); -EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); -EXPORT_SYMBOL(ip_nat_mangle_udp_packet); -EXPORT_SYMBOL(ip_nat_used_tuple); -EXPORT_SYMBOL(ip_nat_follow_master); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d54f14d926f6..36339eb39e17 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index eef99a1b5de6..75c27e92f6ab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -27,6 +27,7 @@ #include <asm/semaphore.h> #include <linux/proc_fs.h> #include <linux/err.h> +#include <linux/cpumask.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -921,8 +922,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -943,7 +946,7 @@ replace_table(struct ipt_table *table, struct ipt_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ipt_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d38913754b1..9bcb398fbc1f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/proc_fs.h> #include <linux/jhash.h> +#include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -30,7 +31,7 @@ #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <linux/netfilter_ipv4/ip_conntrack.h> -#define CLUSTERIP_VERSION "0.7" +#define CLUSTERIP_VERSION "0.8" #define DEBUG_CLUSTERIP @@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); struct clusterip_config { struct list_head list; /* list of all configs */ atomic_t refcount; /* reference count */ + atomic_t entries; /* number of entries/rules + * referencing us */ u_int32_t clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ - u_int16_t num_local_nodes; /* number of local nodes */ - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ + unsigned long local_nodes; /* node number array */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ @@ -66,8 +68,7 @@ struct clusterip_config { static LIST_HEAD(clusterip_configs); -/* clusterip_lock protects the clusterip_configs list _AND_ the configurable - * data within all structurses (num_local_nodes, local_nodes[]) */ +/* clusterip_lock protects the clusterip_configs list */ static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS @@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir; #endif static inline void -clusterip_config_get(struct clusterip_config *c) { +clusterip_config_get(struct clusterip_config *c) +{ atomic_inc(&c->refcount); } static inline void -clusterip_config_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->refcount)) { +clusterip_config_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->refcount)) + kfree(c); +} + +/* increase the count of entries(rules) using/referencing this config */ +static inline void +clusterip_config_entry_get(struct clusterip_config *c) +{ + atomic_inc(&c->entries); +} + +/* decrease the count of entries using/referencing this config. If last + * entry(rule) is removed, remove the config from lists, but don't free it + * yet, since proc-files could still be holding references */ +static inline void +clusterip_config_entry_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->entries)) { write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); + dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); - kfree(c); + + /* In case anyone still accesses the file, the open/close + * functions are also incrementing the refcount on their own, + * so it's safe to remove the entry even if it's in use. */ +#ifdef CONFIG_PROC_FS + remove_proc_entry(c->pde->name, c->pde->parent); +#endif } } - static struct clusterip_config * __clusterip_config_find(u_int32_t clusterip) { @@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip) +clusterip_config_find_get(u_int32_t clusterip, int entry) { struct clusterip_config *c; @@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip) return NULL; } atomic_inc(&c->refcount); + if (entry) + atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); return c; } +static void +clusterip_config_init_nodelist(struct clusterip_config *c, + const struct ipt_clusterip_tgt_info *i) +{ + int n; + + for (n = 0; n < i->num_local_nodes; n++) { + set_bit(i->local_nodes[n] - 1, &c->local_nodes); + } +} + static struct clusterip_config * clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, struct net_device *dev) @@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; - c->num_local_nodes = i->num_local_nodes; - memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); + clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); + atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS /* create proc dir entry */ @@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - if (c->num_local_nodes >= CLUSTERIP_MAX_NODES - || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - - /* check if we alrady have this number in our array */ - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - write_unlock_bh(&clusterip_lock); - return 1; - } - } - c->local_nodes[c->num_local_nodes++] = nodenum; + /* check if we already have this number in our bitfield */ + if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + return 1; - write_unlock_bh(&clusterip_lock); return 0; } static int clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - - if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); - memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); - c->num_local_nodes--; - write_unlock_bh(&clusterip_lock); - return 0; - } - } + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) + return 0; - write_unlock_bh(&clusterip_lock); return 1; } @@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) static inline int clusterip_responsible(struct clusterip_config *config, u_int32_t hash) { - int i; - - read_lock_bh(&clusterip_lock); - - if (config->num_local_nodes == 0) { - read_unlock_bh(&clusterip_lock); - return 0; - } - - for (i = 0; i < config->num_local_nodes; i++) { - if (config->local_nodes[i] == hash) { - read_unlock_bh(&clusterip_lock); - return 1; - } - } - - read_unlock_bh(&clusterip_lock); - - return 0; + return test_bit(hash - 1, &config->local_nodes); } /*********************************************************************** @@ -415,8 +411,26 @@ checkentry(const char *tablename, /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr); - if (!config) { + config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + if (config) { + if (cipinfo->config != NULL) { + /* Case A: This is an entry that gets reloaded, since + * it still has a cipinfo->config pointer. Simply + * increase the entry refcount and return */ + if (cipinfo->config != config) { + printk(KERN_ERR "CLUSTERIP: Reloaded entry " + "has invalid config pointer!\n"); + return 0; + } + clusterip_config_entry_get(cipinfo->config); + } else { + /* Case B: This is a new rule referring to an existing + * clusterip config. */ + cipinfo->config = config; + clusterip_config_entry_get(cipinfo->config); + } + } else { + /* Case C: This is a completely new clusterip config */ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); return 0; @@ -443,10 +457,9 @@ checkentry(const char *tablename, } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } + cipinfo->config = config; } - cipinfo->config = config; - return 1; } @@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) { struct ipt_clusterip_tgt_info *cipinfo = matchinfo; - /* we first remove the proc entry and then drop the reference - * count. In case anyone still accesses the file, the open/close - * functions are also incrementing the refcount on their own */ -#ifdef CONFIG_PROC_FS - remove_proc_entry(cipinfo->config->pde->name, - cipinfo->config->pde->parent); -#endif + /* if no more entries are referencing the config, remove it + * from the list and destroy the proc entry */ + clusterip_config_entry_put(cipinfo->config); + clusterip_config_put(cipinfo->config); } @@ -533,7 +543,7 @@ arp_mangle(unsigned int hook, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip); + c = clusterip_config_find_get(payload->src_ip, 0); if (!c) return NF_ACCEPT; @@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { #ifdef CONFIG_PROC_FS +struct clusterip_seq_position { + unsigned int pos; /* position */ + unsigned int weight; /* number of bits set == size */ + unsigned int bit; /* current bit */ + unsigned long val; /* current value */ +}; + static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; struct clusterip_config *c = pde->data; - unsigned int *nodeidx; - - read_lock_bh(&clusterip_lock); - if (*pos >= c->num_local_nodes) + unsigned int weight; + u_int32_t local_nodes; + struct clusterip_seq_position *idx; + + /* FIXME: possible race */ + local_nodes = c->local_nodes; + weight = hweight32(local_nodes); + if (*pos >= weight) return NULL; - nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); - if (!nodeidx) + idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); + if (!idx) return ERR_PTR(-ENOMEM); - *nodeidx = *pos; - return nodeidx; + idx->pos = *pos; + idx->weight = weight; + idx->bit = ffs(local_nodes); + idx->val = local_nodes; + clear_bit(idx->bit - 1, &idx->val); + + return idx; } static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - *pos = ++(*nodeidx); - if (*pos >= c->num_local_nodes) { + *pos = ++idx->pos; + if (*pos >= idx->weight) { kfree(v); return NULL; } - return nodeidx; + idx->bit = ffs(idx->val); + clear_bit(idx->bit - 1, &idx->val); + return idx; } static void clusterip_seq_stop(struct seq_file *s, void *v) { kfree(v); - - read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - if (*nodeidx != 0) + if (idx->pos != 0) seq_putc(s, ','); - seq_printf(s, "%u", c->local_nodes[*nodeidx]); - if (*nodeidx == c->num_local_nodes-1) + seq_printf(s, "%u", idx->bit); + + if (idx->pos == idx->weight - 1) seq_putc(s, '\n'); return 0; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 715cb613405c..5245bfd33d52 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get((*pskb)->dev); + indev = __in_dev_get_rcu((*pskb)->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index e2c14f3cb2fc..2883ccd8a91d 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum, /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + pm->timestamp_sec = skb->tstamp.off_sec; + pm->timestamp_usec = skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 304bb0a1d4f0..4b0d7e4d6269 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8549f26e2495..381dd6a6aebb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, struct in_device *in_dev; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) != NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { int our = ip_check_mc(in_dev, daddr, saddr, skb->nh.iph->protocol); if (our @@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) err = -ENODEV; if (dev_out == NULL) goto out; - if (__in_dev_get(dev_out) == NULL) { + + /* RACE: Check return value of inet_select_addr instead. */ + if (__in_dev_get_rtnl(dev_out) == NULL) { dev_put(dev_out); goto out; /* Wrong error code */ } diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index b940346de4e7..6d80e063c187 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -136,7 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1)) /* slow start */ ca->cnt = (cwnd * (BICTCP_B-1)) - / cwnd-ca->last_max_cwnd; + / (cwnd - ca->last_max_cwnd); else /* linear increase */ ca->cnt = cwnd / max_increment; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 29222b964951..677419d0c9ad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -355,8 +355,6 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); - if (!ofo_win) - tp->window_clamp = min(tp->window_clamp, app_win); tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); } } @@ -979,14 +977,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + pcount = tcp_skb_pcount(skb); - if (pcount > 1 && - (after(start_seq, TCP_SKB_CB(skb)->seq) || - before(end_seq, TCP_SKB_CB(skb)->end_seq))) { + if (pcount > 1 && !in_sack && + after(TCP_SKB_CB(skb)->end_seq, start_seq)) { unsigned int pkt_len; - if (after(start_seq, TCP_SKB_CB(skb)->seq)) + in_sack = !after(start_seq, + TCP_SKB_CB(skb)->seq); + + if (!in_sack) pkt_len = (start_seq - TCP_SKB_CB(skb)->seq); else @@ -999,9 +1002,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ fack_count += pcount; - in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && - !before(end_seq, TCP_SKB_CB(skb)->end_seq); - sacked = TCP_SKB_CB(skb)->sacked; /* Account D-SACK for retransmitted packet. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13dfb391cdf1..c85819d8474b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -130,19 +130,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -179,7 +180,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -188,7 +189,7 @@ unique: * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - sk->sk_hashent = hash; + sk->sk_hash = hash; BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a88db28b0af7..b1a63b2c6b4a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c10e4435e3b1..7114031fdc70 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -190,7 +190,7 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set initial window to value enough for senders, - * following RFC1414. Senders, not following this RFC, + * following RFC2414. Senders, not following this RFC, * will be satisfied with 2. */ if (mss > (1<<*rcv_wscale)) { @@ -435,6 +435,17 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss int nsize, old_factor; u16 flags; + if (unlikely(len >= skb->len)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "TCP: seg_size=%u, mss=%u, seq=%u, " + "end_seq=%u, skb->len=%u.\n", len, mss_now, + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + skb->len); + WARN_ON(1); + } + return 0; + } + nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -459,9 +470,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = - (TCP_SKB_CB(skb)->sacked & - (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); + TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { @@ -499,11 +508,26 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tcp_skb_pcount(buff); tp->packets_out -= diff; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= diff; tp->left_out -= diff; } + if (diff > 0) { + /* Adjust Reno SACK estimate. */ + if (!tp->rx_opt.sack_ok) { + tp->sacked_out -= diff; + if ((int)tp->sacked_out < 0) + tp->sacked_out = 0; + tcp_sync_left_out(tp); + } + tp->fackets_out -= diff; if ((int)tp->fackets_out < 0) tp->fackets_out = 0; @@ -1595,7 +1619,7 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM */ -void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority) +void tcp_send_active_reset(struct sock *sk, gfp_t priority) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fea3f4402a0..a970b4727ce8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1806,7 +1806,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } for (dev = dev_base; dev != NULL; dev = dev->next) { - struct in_device * in_dev = __in_dev_get(dev); + struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -3520,6 +3520,8 @@ int __init addrconf_init(void) if (err) return err; + ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + register_netdevice_notifier(&ipv6_dev_notf); #ifdef CONFIG_IPV6_PRIVACY diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9b27460f0cc7..40d9a1935ab5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -31,6 +31,7 @@ #include <net/esp.h> #include <asm/scatterlist.h> #include <linux/crypto.h> +#include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <net/icmp.h> @@ -66,10 +67,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3; - clen = (clen + 2 + blksize-1)&~(blksize-1); + blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) - clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); + clen = ALIGN(clen, esp->conf.padlen); if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) { goto error; @@ -133,7 +134,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru struct ipv6_esp_hdr *esph; struct esp_data *esp = x->data; struct sk_buff *trailer; - int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; @@ -235,16 +236,17 @@ out_nofree: static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); + u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); if (x->props.mode) { - mtu = (mtu + 2 + blksize-1)&~(blksize-1); + mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ - mtu += 2 + blksize; + u32 padsize = ((blksize - 1) & 7) + 1; + mtu = ALIGN(mtu + 2, padsize) + blksize - padsize; } if (esp->conf.padlen) - mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); + mtu = ALIGN(mtu, esp->conf.padlen); return mtu + x->props.header_len + esp->auth.icv_full_len; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2f589f24c093..563b442ffab8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -666,7 +666,7 @@ slow_path: */ fh->nexthdr = nexthdr; fh->reserved = 0; - if (frag_id) { + if (!frag_id) { ipv6_select_ident(skb, fh); frag_id = fh->identification; } else diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 29fed6e58d0a..39a96c768102 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1393,7 +1393,7 @@ static void mld_sendpack(struct sk_buff *skb) static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) { - return sizeof(struct mld2_grec) + 4*mld_scount(pmc,type,gdel,sdel); + return sizeof(struct mld2_grec) + 16 * mld_scount(pmc,type,gdel,sdel); } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, @@ -1968,7 +1968,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) } pmc->mca_sources = NULL; pmc->mca_sfmode = MCAST_EXCLUDE; - pmc->mca_sfcount[MCAST_EXCLUDE] = 0; + pmc->mca_sfcount[MCAST_INCLUDE] = 0; pmc->mca_sfcount[MCAST_EXCLUDE] = 1; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 555a31347eda..305d9ee6d7db 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1450,7 +1450,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, static void pndisc_redo(struct sk_buff *skb) { - ndisc_rcv(skb); + ndisc_recv_ns(skb); kfree_skb(skb); } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 216fbe1ac65c..bb7ccfe33f23 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -209,6 +209,17 @@ config IP6_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_NFQUEUE + tristate "NFQUEUE Target Support" + depends on IP_NF_IPTABLES + help + This Target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index bd9a16a5cbba..2b2c370e8b1c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -21,9 +21,9 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o +obj-$(CONFIG_IP6_NF_TARGET_NFQUEUE) += ip6t_NFQUEUE.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o -obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index aa11cf366efa..5027bbe6415e 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; + pmsg->timestamp_sec = entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1cb8adb2787f..21deec25a12b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -28,6 +28,7 @@ #include <asm/uaccess.h> #include <asm/semaphore.h> #include <linux/proc_fs.h> +#include <linux/cpumask.h> #include <linux/netfilter_ipv6/ip6_tables.h> @@ -950,8 +951,10 @@ translate_table(const char *name, } /* And one copy for every other CPU */ - for (i = 1; i < num_possible_cpus(); i++) { - memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, + for_each_cpu(i) { + if (i == 0) + continue; + memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, newinfo->entries, SMP_ALIGN(newinfo->size)); } @@ -972,7 +975,7 @@ replace_table(struct ip6t_table *table, struct ip6t_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { + for_each_cpu(i) { table_base = (void *)newinfo->entries + TABLE_OFFSET(newinfo, i); @@ -1019,7 +1022,7 @@ get_counters(const struct ip6t_table_info *t, unsigned int cpu; unsigned int i; - for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + for_each_cpu(cpu) { i = 0; IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), t->size, @@ -1153,7 +1156,8 @@ do_replace(void __user *user, unsigned int len) return -ENOMEM; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(tmp.size) * num_possible_cpus()); + + SMP_ALIGN(tmp.size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1467,7 +1471,8 @@ int ip6t_register_table(struct ip6t_table *table, = { 0, 0, 0, { 0 }, { 0 }, { } }; newinfo = vmalloc(sizeof(struct ip6t_table_info) - + SMP_ALIGN(repl->size) * num_possible_cpus()); + + SMP_ALIGN(repl->size) * + (highest_possible_processor_id()+1)); if (!newinfo) return -ENOMEM; @@ -1955,6 +1960,57 @@ static void __exit fini(void) #endif } +/* + * find specified header up to transport protocol header. + * If found target header, the offset to the header is set to *offset + * and return 0. otherwise, return -1. + * + * Notes: - non-1st Fragment Header isn't skipped. + * - ESP header isn't skipped. + * - The target header may be trancated. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +{ + unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; + u8 nexthdr = skb->nh.ipv6h->nexthdr; + unsigned int len = skb->len - start; + + while (nexthdr != target) { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + return -1; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -1; + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -1; + + if (ntohs(*fp) & ~0x7) + return -1; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen + 2) << 2; + else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *offset = start; + return 0; +} + EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); @@ -1963,6 +2019,7 @@ EXPORT_SYMBOL(ip6t_unregister_match); EXPORT_SYMBOL(ip6t_register_target); EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); +EXPORT_SYMBOL(ipv6_find_hdr); module_init(init); module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d5b94f142bba..dde37793d20b 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -48,92 +48,21 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_auth_hdr *ah = NULL, _ah; + struct ip_auth_hdr *ah, _ah; const struct ip6t_ah *ahinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; - /*DEBUGP("IPv6 AH entered\n");*/ - /* if (opt->auth == 0) return 0; - * It does not filled on output */ - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_ah header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) - break; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* AH -> evaluate */ - if (nexthdr == NEXTHDR_AUTH) { - temp |= MASK_AH; - break; - } - - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_ah: new pointer too large! \n"); - break; - } - } - - /* AH header not found */ - if (temp != MASK_AH) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) return 0; - if (len < sizeof(struct ip_auth_hdr)){ + ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); + if (ah == NULL) { *hotdrop = 1; return 0; } - ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); - BUG_ON(ah == NULL); + hdrlen = (ah->hdrlen + 2) << 2; DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); DEBUGP("RES %04X ", ah->reserved); diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 540925e4a7a8..c450a635e54b 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index e39dd236fd8e..24bc0cde43a1 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -48,87 +48,22 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_esp_hdr _esp, *eh = NULL; + struct ip_esp_hdr _esp, *eh; const struct ip6t_esp *espinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; /* Make sure this isn't an evil packet */ /*DEBUGP("ipv6_esp entered \n");*/ - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - int hdrlen; - - DEBUGP("ipv6_esp header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - temp |= MASK_ESP; - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_esp: new pointer too large! \n"); - break; - } - } - - /* ESP header not found */ - if (temp != MASK_ESP) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) return 0; - if (len < sizeof(struct ip_esp_hdr)) { + eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); + if (eh == NULL) { *hotdrop = 1; return 0; } - eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); - BUG_ON(eh == NULL); - DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); return (eh != NULL) diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 4bfa30a9bc80..085d5f8eea29 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -48,90 +48,18 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct frag_hdr _frag, *fh = NULL; + struct frag_hdr _frag, *fh; const struct ip6t_frag *fraginfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; - unsigned int hdrlen = 0; - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_frag header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* FRAG -> evaluate */ - if (nexthdr == NEXTHDR_FRAGMENT) { - temp |= MASK_FRAGMENT; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_frag: new pointer too large! \n"); - break; - } - } - - /* FRAG header not found */ - if ( temp != MASK_FRAGMENT ) return 0; - - if (len < sizeof(struct frag_hdr)){ - *hotdrop = 1; - return 0; - } - fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); - BUG_ON(fh == NULL); + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) + return 0; + + fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); + if (fh == NULL){ + *hotdrop = 1; + return 0; + } DEBUGP("INFO %04X ", fh->frag_off); DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 27f3650d127e..1d09485111d0 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 2bb670037df3..beb2fd5cebbb 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -50,98 +50,29 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_rt_hdr _route, *rh = NULL; + struct ipv6_rt_hdr _route, *rh; const struct ip6t_rt *rtinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; struct in6_addr *ap, _addr; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) + return 0; - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_rt header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* ROUTING -> evaluate */ - if (nexthdr == NEXTHDR_ROUTING) { - temp |= MASK_ROUTING; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_rt: new pointer is too large! \n"); - break; - } - } - - /* ROUTING header not found */ - if ( temp != MASK_ROUTING ) return 0; - - if (len < (int)sizeof(struct ipv6_rt_hdr)){ + rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); + if (rh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(rh); + if (skb->len - ptr < hdrlen){ /* Pcket smaller than its length field */ return 0; } - rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); - BUG_ON(rh == NULL); - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); DEBUGP("TYPE %04X ", rh->type); DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5aa3691c578d..a1265a320b11 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 80643e6b346b..d693cb988b78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -209,9 +209,11 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) lock = &tcp_hashinfo.lhash_lock; inet_listen_wlock(&tcp_hashinfo); } else { - sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); - list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; - lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; + unsigned int hash; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); + hash &= (tcp_hashinfo.ehash_size - 1); + list = &tcp_hashinfo.ehash[hash].chain; + lock = &tcp_hashinfo.ehash[hash].lock; write_lock(lock); } @@ -322,13 +324,13 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, - tcp_hashinfo.ehash_size); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + prefetch(head->chain.first); write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ @@ -365,14 +367,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) goto not_unique; } unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); - sk->sk_hashent = hash; + sk->sk_hash = hash; sock_prot_inc_use(sk->sk_prot); write_unlock(&head->lock); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 69b146843a20..bf9519341fd3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -99,7 +99,7 @@ static int udp_v6_get_port(struct sock *sk, unsigned short snum) next:; } result = best; - for(;; result += UDP_HTABLE_SIZE) { + for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -107,6 +107,8 @@ static int udp_v6_get_port(struct sock *sk, unsigned short snum) if (!udp_lport_inuse(result)) break; } + if (i >= (1 << 16) / UDP_HTABLE_SIZE) + goto fail; gotit: udp_port_rover = snum = result; } else { @@ -405,9 +407,8 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, continue; if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) - return s; - continue; + if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + continue; } if(!inet6_mc_check(s, loc_addr, rmt_addr)) continue; @@ -640,6 +641,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; + int connected = 0; /* destination address check */ if (sin6) { @@ -749,6 +751,7 @@ do_udp_sendmsg: fl->fl_ip_dport = inet->dport; daddr = &np->daddr; fl->fl6_flowlabel = np->flow_label; + connected = 1; } if (!fl->oif) @@ -771,6 +774,7 @@ do_udp_sendmsg: } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; + connected = 0; } if (opt == NULL) opt = np->opt; @@ -788,10 +792,13 @@ do_udp_sendmsg: ipv6_addr_copy(&final, &fl->fl6_dst); ipv6_addr_copy(&fl->fl6_dst, rt0->addr); final_p = &final; + connected = 0; } - if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) + if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { fl->oif = np->mcast_oif; + connected = 0; + } err = ip6_dst_lookup(sk, &dst, fl); if (err) @@ -847,10 +854,16 @@ do_append_data: else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); - if (dst) - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + if (dst) { + if (connected) { + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? + &np->daddr : NULL); + } else { + dst_release(dst); + } + } + if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 071cd2cefd8a..953e255d2bc8 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -310,7 +310,7 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) #ifdef CONFIG_INET IRDA_DEBUG(4, "IrLAN: Sending gratuitous ARP\n"); rcu_read_lock(); - in_dev = __in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (in_dev == NULL) goto out; if (in_dev->ifa_list) diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 6602d901f8b1..8aff254cb418 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -38,7 +38,7 @@ #include <net/irda/parameters.h> #include <net/irda/irttp.h> -static struct irttp_cb *irttp = NULL; +static struct irttp_cb *irttp; static void __irttp_close_tsap(struct tsap_cb *self); @@ -86,12 +86,9 @@ static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 }; */ int __init irttp_init(void) { - /* Initialize the irttp structure. */ - if (irttp == NULL) { - irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL); - if (irttp == NULL) - return -ENOMEM; - } + irttp = kmalloc(sizeof(struct irttp_cb), GFP_KERNEL); + if (irttp == NULL) + return -ENOMEM; memset(irttp, 0, sizeof(struct irttp_cb)); irttp->magic = TTP_MAGIC; @@ -100,6 +97,7 @@ int __init irttp_init(void) if (!irttp->tsaps) { IRDA_ERROR("%s: can't allocate IrTTP hashbin!\n", __FUNCTION__); + kfree(irttp); return -ENOMEM; } @@ -115,7 +113,6 @@ int __init irttp_init(void) void __exit irttp_cleanup(void) { /* Check for main structure */ - IRDA_ASSERT(irttp != NULL, return;); IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;); /* @@ -382,7 +379,6 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) struct lsap_cb *lsap; notify_t ttp_notify; - IRDA_ASSERT(irttp != NULL, return NULL;); IRDA_ASSERT(irttp->magic == TTP_MAGIC, return NULL;); /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to @@ -1880,8 +1876,6 @@ static int irttp_seq_open(struct inode *inode, struct file *file) struct seq_file *seq; int rc = -ENOMEM; struct irttp_iter_state *s; - - IRDA_ASSERT(irttp != NULL, return -EINVAL;); s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) diff --git a/net/key/af_key.c b/net/key/af_key.c index 4879743b945a..39031684b65c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -185,7 +185,7 @@ static int pfkey_release(struct socket *sock) } static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, - int allocation, struct sock *sk) + gfp_t allocation, struct sock *sk) { int err = -ENOBUFS; @@ -217,7 +217,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, int allocation, +static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk) { struct sock *sk; @@ -1416,7 +1416,8 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, return 0; } -static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, int allocation) +static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, + gfp_t allocation) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -2153,6 +2154,7 @@ out: static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { + unsigned int dir; int err; struct sadb_x_policy *pol; struct xfrm_policy *xp; @@ -2161,7 +2163,11 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) return -EINVAL; - xp = xfrm_policy_byid(0, pol->sadb_x_policy_id, + dir = xfrm_policy_id2dir(pol->sadb_x_policy_id); + if (dir >= XFRM_POLICY_MAX) + return -EINVAL; + + xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, hdr->sadb_msg_type == SADB_X_SPDDELETE2); if (xp == NULL) return -ENOENT; @@ -2173,9 +2179,9 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) { c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; - km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); + km_policy_notify(xp, dir, &c); } else { - err = key_pol_get_resp(sk, xp, hdr, pol->sadb_x_policy_dir-1); + err = key_pol_get_resp(sk, xp, hdr, dir); } xfrm_pol_put(xp); diff --git a/net/llc/Makefile b/net/llc/Makefile index 5ebd4ed2bd42..4e260cff3c5d 100644 --- a/net/llc/Makefile +++ b/net/llc/Makefile @@ -22,3 +22,4 @@ llc2-y := llc_if.o llc_c_ev.o llc_c_ac.o llc_conn.o llc_c_st.o llc_pdu.o \ llc_sap.o llc_s_ac.o llc_s_ev.o llc_s_st.o af_llc.o llc_station.o llc2-$(CONFIG_PROC_FS) += llc_proc.o +llc2-$(CONFIG_SYSCTL) += sysctl_net_llc.o diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 66f55e514b56..59d02cbbeb9e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -21,6 +21,7 @@ * See the GNU General Public License for more details. */ #include <linux/config.h> +#include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> @@ -37,10 +38,9 @@ static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static struct proto_ops llc_ui_ops; -static int llc_ui_wait_for_conn(struct sock *sk, int timeout); -static int llc_ui_wait_for_disc(struct sock *sk, int timeout); -static int llc_ui_wait_for_data(struct sock *sk, int timeout); -static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout); +static int llc_ui_wait_for_conn(struct sock *sk, long timeout); +static int llc_ui_wait_for_disc(struct sock *sk, long timeout); +static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #if 0 #define dprintk(args...) printk(KERN_DEBUG args) @@ -116,12 +116,12 @@ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) struct llc_sock* llc = llc_sk(sk); int rc = 0; - if (llc_data_accept_state(llc->state) || llc->p_flag) { - int timeout = sock_sndtimeo(sk, noblock); + if (unlikely(llc_data_accept_state(llc->state) || llc->p_flag)) { + long timeout = sock_sndtimeo(sk, noblock); rc = llc_ui_wait_for_busy_core(sk, timeout); } - if (!rc) + if (unlikely(!rc)) rc = llc_build_and_send_pkt(sk, skb); return rc; } @@ -155,7 +155,7 @@ static int llc_ui_create(struct socket *sock, int protocol) struct sock *sk; int rc = -ESOCKTNOSUPPORT; - if (sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM) { + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { @@ -177,7 +177,7 @@ static int llc_ui_release(struct socket *sock) struct sock *sk = sock->sk; struct llc_sock *llc; - if (!sk) + if (unlikely(sk == NULL)) goto out; sock_hold(sk); lock_sock(sk); @@ -189,10 +189,6 @@ static int llc_ui_release(struct socket *sock) if (!sock_flag(sk, SOCK_ZAPPED)) llc_sap_remove_socket(llc->sap, sk); release_sock(sk); - if (llc->sap && hlist_empty(&llc->sap->sk_list.list)) { - llc_release_sockets(llc->sap); - llc_sap_close(llc->sap); - } if (llc->dev) dev_put(llc->dev); sock_put(sk); @@ -221,6 +217,7 @@ static int llc_ui_autoport(void) llc_ui_sap_last_autoport = i + 2; goto out; } + llc_sap_put(sap); } llc_ui_sap_last_autoport = LLC_SAP_DYN_START; tries++; @@ -231,20 +228,13 @@ out: } /** - * llc_ui_autobind - Bind a socket to a specific address. - * @sk: Socket to bind an address to. - * @addr: Address the user wants the socket bound to. + * llc_ui_autobind - automatically bind a socket to a sap + * @sock: socket to bind + * @addr: address to connect to + * + * Used by llc_ui_connect and llc_ui_sendmsg when the user hasn't + * specifically used llc_ui_bind to bind to an specific address/sap * - * Bind a socket to a specific address. For llc a user is able to bind to - * a specific sap only or mac + sap. If the user only specifies a sap and - * a null dmac (all zeros) the user is attempting to bind to an entire - * sap. This will stop anyone else on the local system from using that - * sap. If someone else has a mac + sap open the bind to null + sap will - * fail. - * If the user desires to bind to a specific mac + sap, it is possible to - * have multiple sap connections via multiple macs. - * Bind and autobind for that matter must enforce the correct sap usage - * otherwise all hell will break loose. * Returns: 0 upon success, negative otherwise. */ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) @@ -285,11 +275,7 @@ out: * @addrlen: Length of the uaddr structure. * * Bind a socket to a specific address. For llc a user is able to bind to - * a specific sap only or mac + sap. If the user only specifies a sap and - * a null dmac (all zeros) the user is attempting to bind to an entire - * sap. This will stop anyone else on the local system from using that - * sap. If someone else has a mac + sap open the bind to null + sap will - * fail. + * a specific sap only or mac + sap. * If the user desires to bind to a specific mac + sap, it is possible to * have multiple sap connections via multiple macs. * Bind and autobind for that matter must enforce the correct sap usage @@ -305,10 +291,16 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) int rc = -EINVAL; dprintk("%s: binding %02X\n", __FUNCTION__, addr->sllc_sap); - if (!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)) + if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (addr->sllc_family != AF_LLC) + if (unlikely(addr->sllc_family != AF_LLC)) + goto out; + rc = -ENODEV; + rtnl_lock(); + llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + rtnl_unlock(); + if (!llc->dev) goto out; if (!addr->sllc_sap) { rc = -EUSERS; @@ -322,6 +314,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -EBUSY; /* some other network layer is using the sap */ if (!sap) goto out; + llc_sap_hold(sap); } else { struct llc_addr laddr, daddr; struct sock *ask; @@ -338,7 +331,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) ask = llc_lookup_established(sap, &daddr, &laddr); if (ask) { sock_put(ask); - goto out; + goto out_put; } } llc->laddr.lsap = addr->sllc_sap; @@ -348,6 +341,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) llc_sap_add_socket(sap, sk); sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; +out_put: + llc_sap_put(sap); out: return rc; } @@ -369,7 +364,7 @@ static int llc_ui_shutdown(struct socket *sock, int how) int rc = -ENOTCONN; lock_sock(sk); - if (sk->sk_state != TCP_ESTABLISHED) + if (unlikely(sk->sk_state != TCP_ESTABLISHED)) goto out; rc = -EINVAL; if (how != 2) @@ -404,14 +399,18 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; - struct net_device *dev; int rc = -EINVAL; lock_sock(sk); - if (addrlen != sizeof(*addr)) + if (unlikely(addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (addr->sllc_family != AF_LLC) + if (unlikely(addr->sllc_family != AF_LLC)) + goto out; + if (unlikely(sk->sk_type != SOCK_STREAM)) + goto out; + rc = -EALREADY; + if (unlikely(sock->state == SS_CONNECTING)) goto out; /* bind connection to sap if user hasn't done it. */ if (sock_flag(sk, SOCK_ZAPPED)) { @@ -419,19 +418,13 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, rc = llc_ui_autobind(sock, addr); if (rc) goto out; - llc->daddr.lsap = addr->sllc_sap; - memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); } - dev = llc->dev; - if (sk->sk_type != SOCK_STREAM) - goto out; - rc = -EALREADY; - if (sock->state == SS_CONNECTING) - goto out; + llc->daddr.lsap = addr->sllc_sap; + memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN); sock->state = SS_CONNECTING; sk->sk_state = TCP_SYN_SENT; llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap); - rc = llc_establish_connection(sk, dev->dev_addr, + rc = llc_establish_connection(sk, llc->dev->dev_addr, addr->sllc_mac, addr->sllc_sap); if (rc) { dprintk("%s: llc_ui_send_conn failed :-(\n", __FUNCTION__); @@ -439,12 +432,30 @@ static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_CLOSE; goto out; } - rc = llc_ui_wait_for_conn(sk, sk->sk_rcvtimeo); - if (rc) - dprintk("%s: llc_ui_wait_for_conn failed=%d\n", __FUNCTION__, rc); + + if (sk->sk_state == TCP_SYN_SENT) { + const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + + if (!timeo || !llc_ui_wait_for_conn(sk, timeo)) + goto out; + + rc = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out; + } + + if (sk->sk_state == TCP_CLOSE) + goto sock_error; + + sock->state = SS_CONNECTED; + rc = 0; out: release_sock(sk); return rc; +sock_error: + rc = sock_error(sk) ? : -ECONNABORTED; + sock->state = SS_UNCONNECTED; + goto out; } /** @@ -461,10 +472,10 @@ static int llc_ui_listen(struct socket *sock, int backlog) int rc = -EINVAL; lock_sock(sk); - if (sock->state != SS_UNCONNECTED) + if (unlikely(sock->state != SS_UNCONNECTED)) goto out; rc = -EOPNOTSUPP; - if (sk->sk_type != SOCK_STREAM) + if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EAGAIN; if (sock_flag(sk, SOCK_ZAPPED)) @@ -483,20 +494,14 @@ out: return rc; } -static int llc_ui_wait_for_disc(struct sock *sk, int timeout) +static int llc_ui_wait_for_disc(struct sock *sk, long timeout) { - DECLARE_WAITQUEUE(wait, current); - int rc; + DEFINE_WAIT(wait); + int rc = 0; - add_wait_queue_exclusive(sk->sk_sleep, &wait); - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - rc = 0; - if (sk->sk_state != TCP_CLOSE) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -504,65 +509,40 @@ static int llc_ui_wait_for_disc(struct sock *sk, int timeout) rc = -EAGAIN; if (!timeout) break; + rc = 0; } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + finish_wait(sk->sk_sleep, &wait); return rc; } -static int llc_ui_wait_for_conn(struct sock *sk, int timeout) +static int llc_ui_wait_for_conn(struct sock *sk, long timeout) { - DECLARE_WAITQUEUE(wait, current); - int rc; + DEFINE_WAIT(wait); - add_wait_queue_exclusive(sk->sk_sleep, &wait); - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - rc = -EAGAIN; - if (sk->sk_state == TCP_CLOSE) - break; - rc = 0; - if (sk->sk_state != TCP_ESTABLISHED) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) break; - rc = -ERESTARTSYS; - if (signal_pending(current)) - break; - rc = -EAGAIN; - if (!timeout) + if (signal_pending(current) || !timeout) break; } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return rc; + finish_wait(sk->sk_sleep, &wait); + return timeout; } -static int llc_ui_wait_for_data(struct sock *sk, int timeout) +static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { - DECLARE_WAITQUEUE(wait, current); - int rc = 0; + DEFINE_WAIT(wait); + struct llc_sock *llc = llc_sk(sk); + int rc; - add_wait_queue_exclusive(sk->sk_sleep, &wait); - for (;;) { - __set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_shutdown & RCV_SHUTDOWN) - break; - /* - * Well, if we have backlog, try to process it now. - */ - if (sk->sk_backlog.tail) { - release_sock(sk); - lock_sock(sk); - } + while (1) { + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); rc = 0; - if (skb_queue_empty(&sk->sk_receive_queue)) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else + if (sk_wait_event(sk, &timeout, + (sk->sk_shutdown & RCV_SHUTDOWN) || + (!llc_data_accept_state(llc->state) && + !llc->p_flag))) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -571,40 +551,35 @@ static int llc_ui_wait_for_data(struct sock *sk, int timeout) if (!timeout) break; } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + finish_wait(sk->sk_sleep, &wait); return rc; } -static int llc_ui_wait_for_busy_core(struct sock *sk, int timeout) +static int llc_wait_data(struct sock *sk, long timeo) { - DECLARE_WAITQUEUE(wait, current); - struct llc_sock *llc = llc_sk(sk); int rc; - add_wait_queue_exclusive(sk->sk_sleep, &wait); - for (;;) { - dprintk("%s: looping...\n", __FUNCTION__); - __set_current_state(TASK_INTERRUPTIBLE); - rc = -ENOTCONN; - if (sk->sk_shutdown & RCV_SHUTDOWN) + while (1) { + /* + * POSIX 1003.1g mandates this order. + */ + if (sk->sk_err) { + rc = sock_error(sk); break; + } rc = 0; - if (llc_data_accept_state(llc->state) || llc->p_flag) { - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - } else + if (sk->sk_shutdown & RCV_SHUTDOWN) break; - rc = -ERESTARTSYS; + rc = -EAGAIN; + if (!timeo) + break; + rc = sock_intr_errno(timeo); if (signal_pending(current)) break; - rc = -EAGAIN; - if (!timeout) + rc = 0; + if (sk_wait_data(sk, &timeo)) break; } - __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); return rc; } @@ -627,15 +602,18 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) dprintk("%s: accepting on %02X\n", __FUNCTION__, llc_sk(sk)->laddr.lsap); lock_sock(sk); - if (sk->sk_type != SOCK_STREAM) + if (unlikely(sk->sk_type != SOCK_STREAM)) goto out; rc = -EINVAL; - if (sock->state != SS_UNCONNECTED || sk->sk_state != TCP_LISTEN) + if (unlikely(sock->state != SS_UNCONNECTED || + sk->sk_state != TCP_LISTEN)) goto out; /* wait for a connection to arrive. */ - rc = llc_ui_wait_for_data(sk, sk->sk_rcvtimeo); - if (rc) - goto out; + if (skb_queue_empty(&sk->sk_receive_queue)) { + rc = llc_wait_data(sk, sk->sk_rcvtimeo); + if (rc) + goto out; + } dprintk("%s: got a new connection on %02X\n", __FUNCTION__, llc_sk(sk)->laddr.lsap); skb = skb_dequeue(&sk->sk_receive_queue); @@ -657,7 +635,6 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) /* put original socket back into a clean listen state. */ sk->sk_state = TCP_LISTEN; sk->sk_ack_backlog--; - skb->sk = NULL; dprintk("%s: ok success on %02X, client on %02X\n", __FUNCTION__, llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap); frees: @@ -671,56 +648,167 @@ out: * llc_ui_recvmsg - copy received data to the socket user. * @sock: Socket to copy data from. * @msg: Various user space related information. - * @size: Size of user buffer. + * @len: Size of user buffer. * @flags: User specified flags. * * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) + struct msghdr *msg, size_t len, int flags) { - struct sock *sk = sock->sk; struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name; - struct sk_buff *skb; + const int nonblock = flags & MSG_DONTWAIT; + struct sk_buff *skb = NULL; + struct sock *sk = sock->sk; + struct llc_sock *llc = llc_sk(sk); size_t copied = 0; - int rc = -ENOMEM, timeout; - int noblock = flags & MSG_DONTWAIT; + u32 peek_seq = 0; + u32 *seq; + unsigned long used; + int target; /* Read at least this many bytes */ + long timeo; - dprintk("%s: receiving in %02X from %02X\n", __FUNCTION__, - llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); lock_sock(sk); - timeout = sock_rcvtimeo(sk, noblock); - rc = llc_ui_wait_for_data(sk, timeout); - if (rc) { - dprintk("%s: llc_ui_wait_for_data failed recv " - "in %02X from %02X\n", __FUNCTION__, - llc_sk(sk)->laddr.lsap, llc_sk(sk)->daddr.lsap); + copied = -ENOTCONN; + if (sk->sk_state == TCP_LISTEN) goto out; - } - skb = skb_dequeue(&sk->sk_receive_queue); - if (!skb) /* shutdown */ - goto out; - copied = skb->len; - if (copied > size) - copied = size; - rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (rc) - goto dgram_free; - if (skb->len > copied) { - skb_pull(skb, copied); - skb_queue_head(&sk->sk_receive_queue, skb); - } - if (uaddr) - memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); - msg->msg_namelen = sizeof(*uaddr); - if (!skb->next) { -dgram_free: - kfree_skb(skb); - } + + timeo = sock_rcvtimeo(sk, nonblock); + + seq = &llc->copied_seq; + if (flags & MSG_PEEK) { + peek_seq = llc->copied_seq; + seq = &peek_seq; + } + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + copied = 0; + + do { + u32 offset; + + /* + * We need to check signals first, to get correct SIGURG + * handling. FIXME: Need to check this doesn't impact 1003.1g + * and move it down to the bottom of the loop + */ + if (signal_pending(current)) { + if (copied) + break; + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + break; + } + + /* Next get a buffer. */ + + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + offset = *seq; + goto found_ok_skb; + } + /* Well, if we have backlog, try to process it now yet. */ + + if (copied >= target && !sk->sk_backlog.tail) + break; + + if (copied) { + if (sk->sk_err || + sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || + (flags & MSG_PEEK)) + break; + } else { + if (sock_flag(sk, SOCK_DONE)) + break; + + if (sk->sk_err) { + copied = sock_error(sk); + break; + } + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + if (sk->sk_state == TCP_CLOSE) { + if (!sock_flag(sk, SOCK_DONE)) { + /* + * This occurs when user tries to read + * from never connected socket. + */ + copied = -ENOTCONN; + break; + } + break; + } + if (!timeo) { + copied = -EAGAIN; + break; + } + } + + if (copied >= target) { /* Do not sleep, just process backlog. */ + release_sock(sk); + lock_sock(sk); + } else + sk_wait_data(sk, &timeo); + + if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { + if (net_ratelimit()) + printk(KERN_DEBUG "LLC(%s:%d): Application " + "bug, race in MSG_PEEK.\n", + current->comm, current->pid); + peek_seq = llc->copied_seq; + } + continue; + found_ok_skb: + /* Ok so how much can we use? */ + used = skb->len - offset; + if (len < used) + used = len; + + if (!(flags & MSG_TRUNC)) { + int rc = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (rc) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + } + + *seq += used; + copied += used; + len -= used; + + if (used + offset < skb->len) + continue; + + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb); + *seq = 0; + } + } while (len > 0); + + /* + * According to UNIX98, msg_name/msg_namelen are ignored + * on connected socket. -ANK + * But... af_llc still doesn't have separate sets of methods for + * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will + * eventually fix this tho :-) -acme + */ + if (sk->sk_type == SOCK_DGRAM) + goto copy_uaddr; out: release_sock(sk); - return rc ? : copied; + return copied; +copy_uaddr: + if (uaddr != NULL && skb != NULL) { + memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); + msg->msg_namelen = sizeof(*uaddr); + } + goto out; } /** @@ -740,7 +828,6 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, struct sockaddr_llc *addr = (struct sockaddr_llc *)msg->msg_name; int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; - struct net_device *dev; struct sk_buff *skb; size_t size = 0; int rc = -EINVAL, copied = 0, hdrlen; @@ -763,19 +850,17 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, if (rc) goto release; } - dev = llc->dev; - hdrlen = dev->hard_header_len + llc_ui_header_len(sk, addr); + hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > dev->mtu) - size = dev->mtu; + if (size > llc->dev->mtu) + size = llc->dev->mtu; copied = size - hdrlen; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (!skb) goto release; - skb->sk = sk; - skb->dev = dev; + skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); rc = memcpy_fromiovec(skb_put(skb, copied), msg->msg_iov, copied); @@ -800,15 +885,13 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); - if (rc) - dprintk("%s: llc_ui_send_data failed: %d\n", __FUNCTION__, rc); out: - if (rc) + if (rc) { kfree_skb(skb); release: - if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __FUNCTION__, llc->laddr.lsap, llc->daddr.lsap, rc); + } release_sock(sk); return rc ? : copied; } @@ -895,7 +978,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, int rc = -EINVAL, opt; lock_sock(sk); - if (level != SOL_LLC || optlen != sizeof(int)) + if (unlikely(level != SOL_LLC || optlen != sizeof(int))) goto out; rc = get_user(opt, (int __user *)optval); if (rc) @@ -915,22 +998,22 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, case LLC_OPT_ACK_TMR_EXP: if (opt > LLC_OPT_MAX_ACK_TMR_EXP) goto out; - llc->ack_timer.expire = opt; + llc->ack_timer.expire = opt * HZ; break; case LLC_OPT_P_TMR_EXP: if (opt > LLC_OPT_MAX_P_TMR_EXP) goto out; - llc->pf_cycle_timer.expire = opt; + llc->pf_cycle_timer.expire = opt * HZ; break; case LLC_OPT_REJ_TMR_EXP: if (opt > LLC_OPT_MAX_REJ_TMR_EXP) goto out; - llc->rej_sent_timer.expire = opt; + llc->rej_sent_timer.expire = opt * HZ; break; case LLC_OPT_BUSY_TMR_EXP: if (opt > LLC_OPT_MAX_BUSY_TMR_EXP) goto out; - llc->busy_state_timer.expire = opt; + llc->busy_state_timer.expire = opt * HZ; break; case LLC_OPT_TX_WIN: if (opt > LLC_OPT_MAX_WIN) @@ -970,7 +1053,7 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, int val = 0, len = 0, rc = -EINVAL; lock_sock(sk); - if (level != SOL_LLC) + if (unlikely(level != SOL_LLC)) goto out; rc = get_user(len, optlen); if (rc) @@ -980,17 +1063,17 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, goto out; switch (optname) { case LLC_OPT_RETRY: - val = llc->n2; break; + val = llc->n2; break; case LLC_OPT_SIZE: - val = llc->n1; break; + val = llc->n1; break; case LLC_OPT_ACK_TMR_EXP: - val = llc->ack_timer.expire; break; + val = llc->ack_timer.expire / HZ; break; case LLC_OPT_P_TMR_EXP: - val = llc->pf_cycle_timer.expire; break; + val = llc->pf_cycle_timer.expire / HZ; break; case LLC_OPT_REJ_TMR_EXP: - val = llc->rej_sent_timer.expire; break; + val = llc->rej_sent_timer.expire / HZ; break; case LLC_OPT_BUSY_TMR_EXP: - val = llc->busy_state_timer.expire; break; + val = llc->busy_state_timer.expire / HZ; break; case LLC_OPT_TX_WIN: val = llc->k; break; case LLC_OPT_RX_WIN: @@ -1034,8 +1117,12 @@ static struct proto_ops llc_ui_ops = { .sendpage = sock_no_sendpage, }; -extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb); -extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb); +static char llc_proc_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; +static char llc_sysctl_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the sysctl entries\n"; +static char llc_sock_err_msg[] __initdata = + KERN_CRIT "LLC: Unable to register the network family\n"; static int __init llc2_init(void) { @@ -1048,13 +1135,28 @@ static int __init llc2_init(void) llc_station_init(); llc_ui_sap_last_autoport = LLC_SAP_DYN_START; rc = llc_proc_init(); - if (rc != 0) + if (rc != 0) { + printk(llc_proc_err_msg); goto out_unregister_llc_proto; - sock_register(&llc_ui_family_ops); + } + rc = llc_sysctl_init(); + if (rc) { + printk(llc_sysctl_err_msg); + goto out_proc; + } + rc = sock_register(&llc_ui_family_ops); + if (rc) { + printk(llc_sock_err_msg); + goto out_sysctl; + } llc_add_pack(LLC_DEST_SAP, llc_sap_handler); llc_add_pack(LLC_DEST_CONN, llc_conn_handler); out: return rc; +out_sysctl: + llc_sysctl_exit(); +out_proc: + llc_proc_exit(); out_unregister_llc_proto: proto_unregister(&llc_proto); goto out; @@ -1067,6 +1169,7 @@ static void __exit llc2_exit(void) llc_remove_pack(LLC_DEST_CONN); sock_unregister(PF_LLC); llc_proc_exit(); + llc_sysctl_exit(); proto_unregister(&llc_proto); } diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index b218be4c10ec..b0bcfb1f12dd 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -60,23 +60,10 @@ int llc_conn_ac_clear_remote_busy(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_conn_ind(struct sock *sk, struct sk_buff *skb) { - int rc = -ENOTCONN; - u8 dsap; - struct llc_sap *sap; - - llc_pdu_decode_dsap(skb, &dsap); - sap = llc_sap_find(dsap); - if (sap) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - struct llc_sock *llc = llc_sk(sk); + struct llc_conn_state_ev *ev = llc_conn_ev(skb); - llc_pdu_decode_sa(skb, llc->daddr.mac); - llc_pdu_decode_da(skb, llc->laddr.mac); - llc->dev = skb->dev; - ev->ind_prim = LLC_CONN_PRIM; - rc = 0; - } - return rc; + ev->ind_prim = LLC_CONN_PRIM; + return 0; } int llc_conn_ac_conn_confirm(struct sock *sk, struct sk_buff *skb) @@ -120,10 +107,8 @@ int llc_conn_ac_disc_ind(struct sock *sk, struct sk_buff *skb) reason = LLC_DISC_REASON_RX_DISC_CMD_PDU; } else if (ev->type == LLC_CONN_EV_TYPE_ACK_TMR) reason = LLC_DISC_REASON_ACK_TMR_EXP; - else { - reason = 0; + else rc = -EINVAL; - } if (!rc) { ev->reason = reason; ev->ind_prim = LLC_DISC_PRIM; @@ -160,9 +145,6 @@ int llc_conn_ac_rst_ind(struct sock *sk, struct sk_buff *skb) LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_SABME) { reason = LLC_RESET_REASON_REMOTE; rc = 0; - } else { - reason = 0; - rc = 1; } break; case LLC_CONN_EV_TYPE_ACK_TMR: @@ -172,8 +154,7 @@ int llc_conn_ac_rst_ind(struct sock *sk, struct sk_buff *skb) if (llc->retry_count > llc->n2) { reason = LLC_RESET_REASON_LOCAL; rc = 0; - } else - rc = 1; + } break; } if (!rc) { @@ -217,18 +198,17 @@ int llc_conn_ac_stop_rej_tmr_if_data_flag_eq_2(struct sock *sk, int llc_conn_ac_send_disc_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_disc_cmd(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); llc_conn_ac_set_p_flag_1(sk, skb); @@ -243,20 +223,19 @@ free: int llc_conn_ac_send_dm_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit; - nskb->dev = llc->dev; llc_pdu_decode_pf_bit(skb, &f_bit); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_dm_rsp(nskb, f_bit); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -270,19 +249,17 @@ free: int llc_conn_ac_send_dm_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_dm_rsp(nskb, f_bit); + llc_pdu_init_as_dm_rsp(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -306,17 +283,16 @@ int llc_conn_ac_send_frmr_rsp_f_set_x(struct sock *sk, struct sk_buff *skb) llc_pdu_decode_pf_bit(skb, &f_bit); else f_bit = 0; - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -330,21 +306,19 @@ free: int llc_conn_ac_resend_frmr_rsp_f_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - u8 f_bit = 0; - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = (struct llc_pdu_sn *)&llc->rx_pdu_hdr; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, + llc_pdu_init_as_frmr_rsp(nskb, pdu, 0, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -360,21 +334,20 @@ int llc_conn_ac_resend_frmr_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) u8 f_bit; int rc = -ENOBUFS; struct sk_buff *nskb; + struct llc_sock *llc = llc_sk(sk); llc_pdu_decode_pf_bit(skb, &f_bit); - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_frmr_rsp(nskb, pdu, f_bit, llc->vS, llc->vR, INCORRECT); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -395,7 +368,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -412,7 +385,7 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -429,7 +402,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -451,18 +424,17 @@ int llc_conn_ac_resend_i_xxx_x_set_0_or_send_rr(struct sock *sk, u8 nr; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) + if (likely(!rc)) llc_conn_send_pdu(sk, nskb); else kfree_skb(skb); @@ -487,18 +459,17 @@ int llc_conn_ac_resend_i_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_send_rej_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rej_cmd(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -512,19 +483,17 @@ free: int llc_conn_ac_send_rej_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - u8 f_bit = 1; - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rej_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -538,19 +507,17 @@ free: int llc_conn_ac_send_rej_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - u8 f_bit = 0; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rej_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rej_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -564,18 +531,17 @@ free: int llc_conn_ac_send_rnr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rnr_cmd(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -589,19 +555,17 @@ free: int llc_conn_ac_send_rnr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rnr_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -615,19 +579,17 @@ free: int llc_conn_ac_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - u8 f_bit = 0; - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rnr_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -645,7 +607,7 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb) if (!llc->remote_busy_flag) { llc->remote_busy_flag = 1; mod_timer(&llc->busy_state_timer.timer, - jiffies + llc->busy_state_timer.expire * HZ); + jiffies + llc->busy_state_timer.expire); } return 0; } @@ -653,18 +615,17 @@ int llc_conn_ac_set_remote_busy(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_opt_send_rnr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rnr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -678,18 +639,17 @@ free: int llc_conn_ac_send_rr_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_rr_cmd(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -703,19 +663,18 @@ free: int llc_conn_ac_send_rr_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -729,19 +688,17 @@ free: int llc_conn_ac_send_ack_rsp_f_set_1(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - u8 f_bit = 1; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); - llc_pdu_init_as_rr_rsp(nskb, f_bit, llc->vR); + llc_pdu_init_as_rr_rsp(nskb, 1, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -755,18 +712,17 @@ free: int llc_conn_ac_send_rr_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -780,18 +736,17 @@ free: int llc_conn_ac_send_ack_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, 0, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -815,8 +770,8 @@ void llc_conn_set_p_flag(struct sock *sk, u8 value) int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { struct llc_sap *sap = llc->sap; @@ -824,12 +779,11 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) if (llc->dev->flags & IFF_LOOPBACK) dmac = llc->dev->dev_addr; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_sabme_cmd(nskb, 1); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, dmac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); llc_conn_set_p_flag(sk, 1); @@ -845,11 +799,11 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) { u8 f_bit; int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); llc_pdu_decode_pf_bit(skb, &f_bit); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; nskb->dev = llc->dev; @@ -857,7 +811,7 @@ int llc_conn_ac_send_ua_rsp_f_set_p(struct sock *sk, struct sk_buff *skb) llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_ua_rsp(nskb, f_bit); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -886,7 +840,7 @@ int llc_conn_ac_start_p_timer(struct sock *sk, struct sk_buff *skb) llc_conn_set_p_flag(sk, 1); mod_timer(&llc->pf_cycle_timer.timer, - jiffies + llc->pf_cycle_timer.expire * HZ); + jiffies + llc->pf_cycle_timer.expire); return 0; } @@ -957,7 +911,7 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); - if (!rc) { + if (likely(!rc)) { llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -1001,18 +955,17 @@ static int llc_conn_ac_send_rr_rsp_f_set_ackpf(struct sock *sk, struct sk_buff *skb) { int rc = -ENOBUFS; - struct sk_buff *nskb = llc_alloc_frame(); + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *nskb = llc_alloc_frame(sk, llc->dev); if (nskb) { - struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - nskb->dev = llc->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_S, sap->laddr.lsap, llc->daddr.lsap, LLC_PDU_RSP); llc_pdu_init_as_rr_rsp(nskb, llc->ack_pf, llc->vR); rc = llc_mac_hdr_init(nskb, llc->dev->dev_addr, llc->daddr.mac); - if (rc) + if (unlikely(rc)) goto free; llc_conn_send_pdu(sk, nskb); } @@ -1165,7 +1118,7 @@ int llc_conn_ac_start_ack_timer(struct sock *sk, struct sk_buff *skb) { struct llc_sock *llc = llc_sk(sk); - mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire * HZ); + mod_timer(&llc->ack_timer.timer, jiffies + llc->ack_timer.expire); return 0; } @@ -1174,7 +1127,7 @@ int llc_conn_ac_start_rej_timer(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); mod_timer(&llc->rej_sent_timer.timer, - jiffies + llc->rej_sent_timer.expire * HZ); + jiffies + llc->rej_sent_timer.expire); return 0; } @@ -1185,7 +1138,7 @@ int llc_conn_ac_start_ack_tmr_if_not_running(struct sock *sk, if (!timer_pending(&llc->ack_timer.timer)) mod_timer(&llc->ack_timer.timer, - jiffies + llc->ack_timer.expire * HZ); + jiffies + llc->ack_timer.expire); return 0; } @@ -1233,7 +1186,7 @@ int llc_conn_ac_upd_nr_received(struct sock *sk, struct sk_buff *skb) } if (unacked) mod_timer(&llc->ack_timer.timer, - jiffies + llc->ack_timer.expire * HZ); + jiffies + llc->ack_timer.expire); } else if (llc->failed_data_req) { u8 f_bit; @@ -1354,13 +1307,13 @@ int llc_conn_ac_set_vs_nr(struct sock *sk, struct sk_buff *skb) return 0; } -int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) +static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) { llc_sk(sk)->vS = (llc_sk(sk)->vS + 1) % 128; return 0; } -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) +static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) { struct sock *sk = (struct sock *)timeout_data; struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); @@ -1369,59 +1322,31 @@ void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_P_TMR; + skb_set_owner_r(skb, sk); + ev->type = type; llc_process_tmr_ev(sk, skb); } bh_unlock_sock(sk); } -void llc_conn_busy_tmr_cb(unsigned long timeout_data) +void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) { - struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - bh_lock_sock(sk); - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR); +} - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_BUSY_TMR; - llc_process_tmr_ev(sk, skb); - } - bh_unlock_sock(sk); +void llc_conn_busy_tmr_cb(unsigned long timeout_data) +{ + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR); } void llc_conn_ack_tmr_cb(unsigned long timeout_data) { - struct sock* sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - bh_lock_sock(sk); - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_ACK_TMR; - llc_process_tmr_ev(sk, skb); - } - bh_unlock_sock(sk); + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR); } void llc_conn_rej_tmr_cb(unsigned long timeout_data) { - struct sock *sk = (struct sock *)timeout_data; - struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); - - bh_lock_sock(sk); - if (skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); - - skb->sk = sk; - ev->type = LLC_CONN_EV_TYPE_REJ_TMR; - llc_process_tmr_ev(sk, skb); - } - bh_unlock_sock(sk); + llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR); } int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c index d5bdb53a348f..c5deda246614 100644 --- a/net/llc/llc_c_ev.c +++ b/net/llc/llc_c_ev.c @@ -37,6 +37,7 @@ #include <net/llc_conn.h> #include <net/llc_sap.h> #include <net/sock.h> +#include <net/llc_c_ac.h> #include <net/llc_c_ev.h> #include <net/llc_pdu.h> @@ -46,8 +47,6 @@ #define dprintk(args...) #endif -extern u16 llc_circular_between(u8 a, u8 b, u8 c); - /** * llc_util_ns_inside_rx_window - check if sequence number is in rx window * @ns: sequence number of received pdu. @@ -99,7 +98,7 @@ out: int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_CONN_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -107,7 +106,7 @@ int llc_conn_ev_conn_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_DATA_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -115,7 +114,7 @@ int llc_conn_ev_data_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_disc_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_DISC_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -123,7 +122,7 @@ int llc_conn_ev_disc_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->prim == LLC_RESET_PRIM && ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1; @@ -131,7 +130,7 @@ int llc_conn_ev_rst_req(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type == LLC_CONN_EV_TYPE_SIMPLE && ev->prim_type == LLC_CONN_EV_LOCAL_BUSY_DETECTED ? 0 : 1; @@ -139,7 +138,7 @@ int llc_conn_ev_local_busy_detected(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_local_busy_cleared(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type == LLC_CONN_EV_TYPE_SIMPLE && ev->prim_type == LLC_CONN_EV_LOCAL_BUSY_CLEARED ? 0 : 1; @@ -152,7 +151,7 @@ int llc_conn_ev_rx_bad_pdu(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_disc_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_DISC ? 0 : 1; @@ -160,7 +159,7 @@ int llc_conn_ev_rx_disc_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_dm_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_DM ? 0 : 1; @@ -168,7 +167,7 @@ int llc_conn_ev_rx_dm_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_frmr_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_RSP(pdu) == LLC_2_PDU_RSP_FRMR ? 0 : 1; @@ -176,7 +175,7 @@ int llc_conn_ev_rx_frmr_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -186,7 +185,7 @@ int llc_conn_ev_rx_i_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -197,9 +196,9 @@ int llc_conn_ev_rx_i_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_0(pdu) && ns != vr && @@ -209,9 +208,9 @@ int llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_1(pdu) && ns != vr && @@ -221,10 +220,11 @@ int llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn * pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); - u16 rc = LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr && + const struct llc_pdu_sn * pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); + const u16 rc = LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_I(pdu) && + ns != vr && llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1; if (!rc) dprintk("%s: matched, state=%d, ns=%d, vr=%d\n", @@ -234,7 +234,7 @@ int llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -244,7 +244,7 @@ int llc_conn_ev_rx_i_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_1(pdu) && @@ -253,7 +253,7 @@ int llc_conn_ev_rx_i_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && @@ -263,9 +263,9 @@ int llc_conn_ev_rx_i_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_0(pdu) && ns != vr && @@ -275,9 +275,9 @@ int llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && LLC_I_PF_IS_1(pdu) && ns != vr && @@ -287,9 +287,9 @@ int llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr && !llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1; @@ -298,10 +298,11 @@ int llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns(struct sock *sk, int llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vr = llc_sk(sk)->vR; - u8 ns = LLC_I_GET_NS(pdu); - u16 rc = LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && ns != vr && + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vr = llc_sk(sk)->vR; + const u8 ns = LLC_I_GET_NS(pdu); + const u16 rc = LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_I(pdu) && + ns != vr && llc_util_ns_inside_rx_window(ns, vr, llc_sk(sk)->rw) ? 0 : 1; if (!rc) dprintk("%s: matched, state=%d, ns=%d, vr=%d\n", @@ -311,7 +312,7 @@ int llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns(struct sock *sk, int llc_conn_ev_rx_rej_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -320,7 +321,7 @@ int llc_conn_ev_rx_rej_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -329,7 +330,7 @@ int llc_conn_ev_rx_rej_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -338,7 +339,7 @@ int llc_conn_ev_rx_rej_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -347,7 +348,7 @@ int llc_conn_ev_rx_rej_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rej_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PDU_RSP(pdu) == LLC_2_PDU_RSP_REJ ? 0 : 1; @@ -355,7 +356,7 @@ int llc_conn_ev_rx_rej_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -364,7 +365,7 @@ int llc_conn_ev_rx_rnr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -373,7 +374,7 @@ int llc_conn_ev_rx_rnr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -382,7 +383,7 @@ int llc_conn_ev_rx_rnr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rnr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -391,7 +392,7 @@ int llc_conn_ev_rx_rnr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_0(pdu) && @@ -400,7 +401,7 @@ int llc_conn_ev_rx_rr_cmd_pbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_S(pdu) && LLC_S_PF_IS_1(pdu) && @@ -409,7 +410,7 @@ int llc_conn_ev_rx_rr_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && @@ -419,7 +420,7 @@ int llc_conn_ev_rx_rr_rsp_fbit_set_0(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_rr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); return llc_conn_space(sk, skb) && LLC_PDU_IS_RSP(pdu) && LLC_PDU_TYPE_IS_S(pdu) && @@ -429,7 +430,7 @@ int llc_conn_ev_rx_rr_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_sabme_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); return LLC_PDU_IS_CMD(pdu) && LLC_PDU_TYPE_IS_U(pdu) && LLC_U_PDU_CMD(pdu) == LLC_2_PDU_CMD_SABME ? 0 : 1; @@ -446,7 +447,7 @@ int llc_conn_ev_rx_ua_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); if (LLC_PDU_IS_CMD(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) { @@ -461,7 +462,7 @@ int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); if (LLC_PDU_IS_CMD(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) @@ -477,32 +478,10 @@ int llc_conn_ev_rx_xxx_cmd_pbit_set_x(struct sock *sk, struct sk_buff *skb) return rc; } -int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb) -{ - u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - - if (LLC_PDU_IS_RSP(pdu)) { - if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) { - if (LLC_I_PF_IS_1(pdu)) - rc = 0; - } else if (LLC_PDU_TYPE_IS_U(pdu)) - switch (LLC_U_PDU_RSP(pdu)) { - case LLC_2_PDU_RSP_UA: - case LLC_2_PDU_RSP_DM: - case LLC_2_PDU_RSP_FRMR: - if (LLC_U_PF_IS_1(pdu)) - rc = 0; - break; - } - } - return rc; -} - int llc_conn_ev_rx_xxx_rsp_fbit_set_x(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); if (LLC_PDU_IS_RSP(pdu)) { if (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) @@ -524,9 +503,9 @@ int llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vs = llc_sk(sk)->vS; - u8 nr = LLC_I_GET_NR(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vs = llc_sk(sk)->vS; + const u8 nr = LLC_I_GET_NR(pdu); if (LLC_PDU_IS_CMD(pdu) && (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) && @@ -542,9 +521,9 @@ int llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr(struct sock *sk, struct sk_buff *skb) { u16 rc = 1; - struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); - u8 vs = llc_sk(sk)->vS; - u8 nr = LLC_I_GET_NR(pdu); + const struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); + const u8 vs = llc_sk(sk)->vS; + const u8 nr = LLC_I_GET_NR(pdu); if (LLC_PDU_IS_RSP(pdu) && (LLC_PDU_TYPE_IS_I(pdu) || LLC_PDU_TYPE_IS_S(pdu)) && @@ -563,28 +542,28 @@ int llc_conn_ev_rx_any_frame(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_p_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_P_TMR; } int llc_conn_ev_ack_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_ACK_TMR; } int llc_conn_ev_rej_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_REJ_TMR; } int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type != LLC_CONN_EV_TYPE_BUSY_TMR; } @@ -596,7 +575,7 @@ int llc_conn_ev_init_p_f_cycle(struct sock *sk, struct sk_buff *skb) int llc_conn_ev_tx_buffer_full(struct sock *sk, struct sk_buff *skb) { - struct llc_conn_state_ev *ev = llc_conn_ev(skb); + const struct llc_conn_state_ev *ev = llc_conn_ev(skb); return ev->type == LLC_CONN_EV_TYPE_SIMPLE && ev->prim_type == LLC_CONN_EV_TX_BUFF_FULL ? 0 : 1; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 4c644bc70eae..c761c15da421 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -40,6 +40,11 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, /* Offset table on connection states transition diagram */ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; +int sysctl_llc2_ack_timeout = LLC2_ACK_TIME * HZ; +int sysctl_llc2_p_timeout = LLC2_P_TIME * HZ; +int sysctl_llc2_rej_timeout = LLC2_REJ_TIME * HZ; +int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; + /** * llc_conn_state_process - sends event to connection state machine * @sk: connection @@ -53,7 +58,7 @@ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) { int rc; - struct llc_sock *llc = llc_sk(sk); + struct llc_sock *llc = llc_sk(skb->sk); struct llc_conn_state_ev *ev = llc_conn_ev(skb); /* @@ -63,13 +68,16 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) */ skb_get(skb); ev->ind_prim = ev->cfm_prim = 0; - rc = llc_conn_service(sk, skb); /* sending event to state machine */ - if (rc) { + /* + * Send event to state machine + */ + rc = llc_conn_service(skb->sk, skb); + if (unlikely(rc != 0)) { printk(KERN_ERR "%s: llc_conn_service failed\n", __FUNCTION__); goto out_kfree_skb; } - if (!ev->ind_prim && !ev->cfm_prim) { + if (unlikely(!ev->ind_prim && !ev->cfm_prim)) { /* indicate or confirm not required */ /* XXX this is not very pretty, perhaps we should store * XXX indicate/confirm-needed state in the llc_conn_state_ev @@ -80,13 +88,13 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) goto out_skb_put; } - if (ev->ind_prim && ev->cfm_prim) /* Paranoia */ + if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */ skb_get(skb); switch (ev->ind_prim) { case LLC_DATA_PRIM: - llc_save_primitive(skb, LLC_DATA_PRIM); - if (sock_queue_rcv_skb(sk, skb)) { + llc_save_primitive(sk, skb, LLC_DATA_PRIM); + if (unlikely(sock_queue_rcv_skb(sk, skb))) { /* * shouldn't happen */ @@ -95,13 +103,14 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } break; - case LLC_CONN_PRIM: { - struct sock *parent = skb->sk; - - skb->sk = sk; - skb_queue_tail(&parent->sk_receive_queue, skb); - sk->sk_state_change(parent); - } + case LLC_CONN_PRIM: + /* + * Can't be sock_queue_rcv_skb, because we have to leave the + * skb->sk pointing to the newly created struct sock in + * llc_conn_handler. -acme + */ + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_state_change(sk); break; case LLC_DISC_PRIM: sock_hold(sk); @@ -111,8 +120,8 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) sk->sk_socket->state = SS_UNCONNECTED; sk->sk_state = TCP_CLOSE; if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); + sk->sk_state_change(sk); } } kfree_skb(skb); @@ -465,7 +474,7 @@ static int llc_exec_conn_trans_actions(struct sock *sk, } /** - * llc_lookup_established - Finds connection for the remote/local sap/mac + * __llc_lookup_established - Finds connection for the remote/local sap/mac * @sap: SAP * @daddr: address of remote LLC (MAC + SAP) * @laddr: address of local LLC (MAC + SAP) @@ -473,14 +482,16 @@ static int llc_exec_conn_trans_actions(struct sock *sk, * Search connection list of the SAP and finds connection using the remote * mac, remote sap, local mac, and local sap. Returns pointer for * connection found, %NULL otherwise. + * Caller has to make sure local_bh is disabled. */ -struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, - struct llc_addr *laddr) +static struct sock *__llc_lookup_established(struct llc_sap *sap, + struct llc_addr *daddr, + struct llc_addr *laddr) { struct sock *rc; struct hlist_node *node; - read_lock_bh(&sap->sk_list.lock); + read_lock(&sap->sk_list.lock); sk_for_each(rc, node, &sap->sk_list.list) { struct llc_sock *llc = llc_sk(rc); @@ -494,10 +505,22 @@ struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, } rc = NULL; found: - read_unlock_bh(&sap->sk_list.lock); + read_unlock(&sap->sk_list.lock); return rc; } +struct sock *llc_lookup_established(struct llc_sap *sap, + struct llc_addr *daddr, + struct llc_addr *laddr) +{ + struct sock *sk; + + local_bh_disable(); + sk = __llc_lookup_established(sap, daddr, laddr); + local_bh_enable(); + return sk; +} + /** * llc_lookup_listener - Finds listener for local MAC + SAP * @sap: SAP @@ -506,6 +529,7 @@ found: * Search connection list of the SAP and finds connection listening on * local mac, and local sap. Returns pointer for parent socket found, * %NULL otherwise. + * Caller has to make sure local_bh is disabled. */ static struct sock *llc_lookup_listener(struct llc_sap *sap, struct llc_addr *laddr) @@ -513,7 +537,7 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap, struct sock *rc; struct hlist_node *node; - read_lock_bh(&sap->sk_list.lock); + read_lock(&sap->sk_list.lock); sk_for_each(rc, node, &sap->sk_list.list) { struct llc_sock *llc = llc_sk(rc); @@ -527,10 +551,19 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap, } rc = NULL; found: - read_unlock_bh(&sap->sk_list.lock); + read_unlock(&sap->sk_list.lock); return rc; } +static struct sock *__llc_lookup(struct llc_sap *sap, + struct llc_addr *daddr, + struct llc_addr *laddr) +{ + struct sock *sk = __llc_lookup_established(sap, daddr, laddr); + + return sk ? : llc_lookup_listener(sap, laddr); +} + /** * llc_data_accept_state - designates if in this state data can be sent. * @state: state of connection. @@ -544,14 +577,14 @@ u8 llc_data_accept_state(u8 state) } /** - * find_next_offset - finds offset for next category of transitions + * llc_find_next_offset - finds offset for next category of transitions * @state: state table. * @offset: start offset. * * Finds offset of next category of transitions in transition table. * Returns the start index of next category. */ -static u16 find_next_offset(struct llc_conn_state *state, u16 offset) +static u16 __init llc_find_next_offset(struct llc_conn_state *state, u16 offset) { u16 cnt = 0; struct llc_conn_state_trans **next_trans; @@ -578,8 +611,8 @@ void __init llc_build_offset_table(void) next_offset = 0; for (ev_type = 0; ev_type < NBR_CONN_EV; ev_type++) { llc_offset_table[state][ev_type] = next_offset; - next_offset += find_next_offset(curr_state, - next_offset) + 1; + next_offset += llc_find_next_offset(curr_state, + next_offset) + 1; } } } @@ -623,6 +656,7 @@ static int llc_find_offset(int state, int ev_type) */ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) { + llc_sap_hold(sap); write_lock_bh(&sap->sk_list.lock); llc_sk(sk)->sap = sap; sk_add_node(sk, &sap->sk_list.list); @@ -642,6 +676,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) write_lock_bh(&sap->sk_list.lock); sk_del_node_init(sk); write_unlock_bh(&sap->sk_list.lock); + llc_sap_put(sap); } /** @@ -654,15 +689,34 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); - struct llc_sock *llc = llc_sk(sk); - if (!llc->dev) - llc->dev = skb->dev; ev->type = LLC_CONN_EV_TYPE_PDU; ev->reason = 0; return llc_conn_state_process(sk, skb); } +static struct sock *llc_create_incoming_sock(struct sock *sk, + struct net_device *dev, + struct llc_addr *saddr, + struct llc_addr *daddr) +{ + struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + sk->sk_prot); + struct llc_sock *newllc, *llc = llc_sk(sk); + + if (!newsk) + goto out; + newllc = llc_sk(newsk); + memcpy(&newllc->laddr, daddr, sizeof(newllc->laddr)); + memcpy(&newllc->daddr, saddr, sizeof(newllc->daddr)); + newllc->dev = dev; + dev_hold(dev); + llc_sap_add_socket(llc->sap, newsk); + llc_sap_hold(llc->sap); +out: + return newsk; +} + void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr saddr, daddr; @@ -673,35 +727,35 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_da(skb, daddr.mac); llc_pdu_decode_dsap(skb, &daddr.lsap); - sk = llc_lookup_established(sap, &saddr, &daddr); - if (!sk) { + sk = __llc_lookup(sap, &saddr, &daddr); + if (!sk) + goto drop; + + bh_lock_sock(sk); + /* + * This has to be done here and not at the upper layer ->accept + * method because of the way the PROCOM state machine works: + * it needs to set several state variables (see, for instance, + * llc_adm_actions_2 in net/llc/llc_c_st.c) and send a packet to + * the originator of the new connection, and this state has to be + * in the newly created struct sock private area. -acme + */ + if (unlikely(sk->sk_state == TCP_LISTEN)) { + struct sock *newsk = llc_create_incoming_sock(sk, skb->dev, + &saddr, &daddr); + if (!newsk) + goto drop_unlock; + skb_set_owner_r(skb, newsk); + } else { /* - * Didn't find an active connection; verify if there - * is a listening socket for this llc addr + * Can't be skb_set_owner_r, this will be done at the + * llc_conn_state_process function, later on, when we will use + * skb_queue_rcv_skb to send it to upper layers, this is + * another trick required to cope with how the PROCOM state + * machine works. -acme */ - struct llc_sock *llc; - struct sock *parent = llc_lookup_listener(sap, &daddr); - - if (!parent) { - dprintk("llc_lookup_listener failed!\n"); - goto drop; - } - - sk = llc_sk_alloc(parent->sk_family, GFP_ATOMIC, parent->sk_prot); - if (!sk) { - sock_put(parent); - goto drop; - } - llc = llc_sk(sk); - memcpy(&llc->laddr, &daddr, sizeof(llc->laddr)); - memcpy(&llc->daddr, &saddr, sizeof(llc->daddr)); - llc_sap_add_socket(sap, sk); - sock_hold(sk); - sock_put(parent); - skb->sk = parent; - } else skb->sk = sk; - bh_lock_sock(sk); + } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); else { @@ -709,11 +763,16 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) llc_set_backlog_type(skb, LLC_PACKET); sk_add_backlog(sk, skb); } +out: bh_unlock_sock(sk); sock_put(sk); return; drop: kfree_skb(skb); + return; +drop_unlock: + kfree_skb(skb); + goto out; } #undef LLC_REFCNT_DEBUG @@ -722,32 +781,6 @@ static atomic_t llc_sock_nr; #endif /** - * llc_release_sockets - releases all sockets in a sap - * @sap: sap to release its sockets - * - * Releases all connections of a sap. Returns 0 if all actions complete - * successfully, nonzero otherwise - */ -int llc_release_sockets(struct llc_sap *sap) -{ - int rc = 0; - struct sock *sk; - struct hlist_node *node; - - write_lock_bh(&sap->sk_list.lock); - - sk_for_each(sk, node, &sap->sk_list.list) { - llc_sk(sk)->state = LLC_CONN_STATE_TEMP; - - if (llc_send_disc(sk)) - rc = 1; - } - - write_unlock_bh(&sap->sk_list.lock); - return rc; -} - -/** * llc_backlog_rcv - Processes rx frames and expired timers. * @sk: LLC sock (p8022 connection) * @skb: queued rx frame or event @@ -762,14 +795,14 @@ static int llc_backlog_rcv(struct sock *sk, struct sk_buff *skb) int rc = 0; struct llc_sock *llc = llc_sk(sk); - if (llc_backlog_type(skb) == LLC_PACKET) { - if (llc->state > 1) /* not closed */ + if (likely(llc_backlog_type(skb) == LLC_PACKET)) { + if (likely(llc->state > 1)) /* not closed */ rc = llc_conn_rcv(sk, skb); else goto out_kfree_skb; } else if (llc_backlog_type(skb) == LLC_EVENT) { /* timer expiration event */ - if (llc->state > 1) /* not closed */ + if (likely(llc->state > 1)) /* not closed */ rc = llc_conn_state_process(sk, skb); else goto out_kfree_skb; @@ -799,22 +832,22 @@ static void llc_sk_init(struct sock* sk) llc->dec_step = llc->connect_step = 1; init_timer(&llc->ack_timer.timer); - llc->ack_timer.expire = LLC_ACK_TIME; + llc->ack_timer.expire = sysctl_llc2_ack_timeout; llc->ack_timer.timer.data = (unsigned long)sk; llc->ack_timer.timer.function = llc_conn_ack_tmr_cb; init_timer(&llc->pf_cycle_timer.timer); - llc->pf_cycle_timer.expire = LLC_P_TIME; + llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; llc->pf_cycle_timer.timer.data = (unsigned long)sk; llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb; init_timer(&llc->rej_sent_timer.timer); - llc->rej_sent_timer.expire = LLC_REJ_TIME; + llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; llc->rej_sent_timer.timer.data = (unsigned long)sk; llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb; init_timer(&llc->busy_state_timer.timer); - llc->busy_state_timer.expire = LLC_BUSY_TIME; + llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; llc->busy_state_timer.timer.data = (unsigned long)sk; llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb; @@ -834,7 +867,7 @@ static void llc_sk_init(struct sock* sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, int priority, struct proto *prot) +struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) { struct sock *sk = sk_alloc(family, priority, prot, 1); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 9727455bf0e7..ab0fcd32fd84 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -40,6 +40,7 @@ static struct llc_sap *llc_sap_alloc(void) sap->state = LLC_SAP_STATE_ACTIVE; memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN); rwlock_init(&sap->sk_list.lock); + atomic_set(&sap->refcnt, 1); } return sap; } @@ -52,9 +53,7 @@ static struct llc_sap *llc_sap_alloc(void) */ static void llc_add_sap(struct llc_sap *sap) { - write_lock_bh(&llc_sap_list_lock); list_add_tail(&sap->node, &llc_sap_list); - write_unlock_bh(&llc_sap_list_lock); } /** @@ -70,11 +69,25 @@ static void llc_del_sap(struct llc_sap *sap) write_unlock_bh(&llc_sap_list_lock); } +static struct llc_sap *__llc_sap_find(unsigned char sap_value) +{ + struct llc_sap* sap; + + list_for_each_entry(sap, &llc_sap_list, node) + if (sap->laddr.lsap == sap_value) + goto out; + sap = NULL; +out: + return sap; +} + /** * llc_sap_find - searchs a SAP in station * @sap_value: sap to be found * * Searchs for a sap in the sap list of the LLC's station upon the sap ID. + * If the sap is found it will be refcounted and the user will have to do + * a llc_sap_put after use. * Returns the sap or %NULL if not found. */ struct llc_sap *llc_sap_find(unsigned char sap_value) @@ -82,11 +95,9 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) struct llc_sap* sap; read_lock_bh(&llc_sap_list_lock); - list_for_each_entry(sap, &llc_sap_list, node) - if (sap->laddr.lsap == sap_value) - goto out; - sap = NULL; -out: + sap = __llc_sap_find(sap_value); + if (sap) + llc_sap_hold(sap); read_unlock_bh(&llc_sap_list_lock); return sap; } @@ -106,19 +117,20 @@ struct llc_sap *llc_sap_open(unsigned char lsap, struct packet_type *pt, struct net_device *orig_dev)) { - struct llc_sap *sap = llc_sap_find(lsap); + struct llc_sap *sap = NULL; - if (sap) { /* SAP already exists */ - sap = NULL; + write_lock_bh(&llc_sap_list_lock); + if (__llc_sap_find(lsap)) /* SAP already exists */ goto out; - } sap = llc_sap_alloc(); if (!sap) goto out; sap->laddr.lsap = lsap; sap->rcv_func = func; + llc_sap_hold(sap); llc_add_sap(sap); out: + write_unlock_bh(&llc_sap_list_lock); return sap; } diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 0f84f66018e4..ba90f7f0801a 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -47,14 +47,11 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) int rc = -ECONNABORTED; struct llc_sock *llc = llc_sk(sk); - if (llc->state == LLC_CONN_STATE_ADM) + if (unlikely(llc->state == LLC_CONN_STATE_ADM)) goto out; rc = -EBUSY; - if (llc_data_accept_state(llc->state)) { /* data_conn_refuse */ - llc->failed_data_req = 1; - goto out; - } - if (llc->p_flag) { + if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */ + llc->p_flag)) { llc->failed_data_req = 1; goto out; } @@ -110,6 +107,7 @@ int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap) ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_CONN_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; + skb_set_owner_w(skb, sk); rc = llc_conn_state_process(sk, skb); } out_put: @@ -144,6 +142,7 @@ int llc_send_disc(struct sock *sk) skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; + skb_set_owner_w(skb, sk); sk->sk_state = TCP_CLOSING; ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 13b46240b7a1..8f3addf0724c 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -99,15 +99,19 @@ out: static inline int llc_fixup_skb(struct sk_buff *skb) { u8 llc_len = 2; - struct llc_pdu_sn *pdu; + struct llc_pdu_un *pdu; - if (!pskb_may_pull(skb, sizeof(*pdu))) + if (unlikely(!pskb_may_pull(skb, sizeof(*pdu)))) return 0; - pdu = (struct llc_pdu_sn *)skb->data; + pdu = (struct llc_pdu_un *)skb->data; if ((pdu->ctrl_1 & LLC_PDU_TYPE_MASK) == LLC_PDU_TYPE_U) llc_len = 1; llc_len += 2; + + if (unlikely(!pskb_may_pull(skb, llc_len))) + return 0; + skb->h.raw += llc_len; skb_pull(skb, llc_len); if (skb->protocol == htons(ETH_P_802_2)) { @@ -166,17 +170,22 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ if (sap->rcv_func) { sap->rcv_func(skb, dev, pt, orig_dev); - goto out; + goto out_put; } dest = llc_pdu_type(skb); if (unlikely(!dest || !llc_type_handlers[dest - 1])) - goto drop; + goto drop_put; llc_type_handlers[dest - 1](sap, skb); +out_put: + llc_sap_put(sap); out: return 0; drop: kfree_skb(skb); goto out; +drop_put: + kfree_skb(skb); + goto out_put; handle_station: if (!llc_station_handler) goto drop; diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index ab5784cf163e..b4d55b6abb67 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -98,7 +98,7 @@ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, dsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 36e8db3fa1a2..bd531cb235a7 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -134,7 +134,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, atomic_read(&sk->sk_wmem_alloc), - atomic_read(&sk->sk_rmem_alloc), + atomic_read(&sk->sk_rmem_alloc) - llc->copied_seq, sk->sk_state, sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, llc->link); diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index ed8ba7de6122..bb3580fb8cfe 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -58,7 +58,7 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } @@ -81,7 +81,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } @@ -103,15 +103,14 @@ int llc_sap_action_send_xid_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; - nskb->dev = skb->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(nskb, mac_sa, mac_da); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(nskb); out: return rc; @@ -135,7 +134,7 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(skb); return rc; } @@ -149,15 +148,14 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); - nskb = llc_alloc_frame(); + nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; - nskb->dev = skb->dev; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); rc = llc_mac_hdr_init(nskb, mac_sa, mac_da); - if (!rc) + if (likely(!rc)) rc = dev_queue_xmit(nskb); out: return rc; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 34228ef14985..4029ceee9b91 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -26,11 +26,12 @@ /** * llc_alloc_frame - allocates sk_buff for frame + * @dev: network device this skb will be sent over * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ -struct sk_buff *llc_alloc_frame(void) +struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev) { struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); @@ -38,18 +39,23 @@ struct sk_buff *llc_alloc_frame(void) skb_reserve(skb, 50); skb->nh.raw = skb->h.raw = skb->data; skb->protocol = htons(ETH_P_802_2); - skb->dev = dev_base->next; + skb->dev = dev; skb->mac.raw = skb->head; + if (sk != NULL) + skb_set_owner_w(skb, sk); } return skb; } -void llc_save_primitive(struct sk_buff* skb, u8 prim) +void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) { - struct sockaddr_llc *addr = llc_ui_skb_cb(skb); + struct sockaddr_llc *addr; + if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */ + return; /* save primitive for use by the user. */ - addr->sllc_family = skb->sk->sk_family; + addr = llc_ui_skb_cb(skb); + addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; addr->sllc_xid = prim == LLC_XID_PRIM; @@ -189,7 +195,7 @@ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) if (skb->sk->sk_state == TCP_LISTEN) kfree_skb(skb); else { - llc_save_primitive(skb, ev->prim); + llc_save_primitive(skb->sk, skb, ev->prim); /* queue skb to the user. */ if (sock_queue_rcv_skb(skb->sk, skb)) @@ -308,7 +314,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) sk = llc_lookup_dgram(sap, &laddr); if (sk) { - skb->sk = sk; + skb_set_owner_r(skb, sk); llc_sap_rcv(sap, skb); sock_put(sk); } else diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 8fe48a24bad5..f37dbf8ef126 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -50,6 +50,10 @@ struct llc_station { struct sk_buff_head mac_pdu_q; }; +#define LLC_STATION_ACK_TIME (3 * HZ) + +int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME; + /* Types of events (possible values in 'ev->type') */ #define LLC_STATION_EV_TYPE_SIMPLE 1 #define LLC_STATION_EV_TYPE_CONDITION 2 @@ -218,7 +222,8 @@ static void llc_station_send_pdu(struct sk_buff *skb) static int llc_station_ac_start_ack_timer(struct sk_buff *skb) { - mod_timer(&llc_main_station.ack_timer, jiffies + LLC_ACK_TIME * HZ); + mod_timer(&llc_main_station.ack_timer, + jiffies + sysctl_llc_station_ack_timeout); return 0; } @@ -249,14 +254,14 @@ static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb) static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) { int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(); + struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127); rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, llc_station_mac_sa); - if (rc) + if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); out: @@ -270,18 +275,17 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff* nskb = llc_alloc_frame(); + struct sk_buff* nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; rc = 0; - nskb->dev = skb->dev; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_xid_rsp(nskb, LLC_XID_NULL_CLASS_2, 127); rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da); - if (rc) + if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); out: @@ -295,18 +299,17 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) { u8 mac_da[ETH_ALEN], dsap; int rc = 1; - struct sk_buff *nskb = llc_alloc_frame(); + struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev); if (!nskb) goto out; rc = 0; - nskb->dev = skb->dev; llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_ssap(skb, &dsap); llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP); llc_pdu_init_as_test_rsp(nskb, skb); rc = llc_mac_hdr_init(nskb, llc_station_mac_sa, mac_da); - if (rc) + if (unlikely(rc)) goto free; llc_station_send_pdu(nskb); out: @@ -689,7 +692,8 @@ int __init llc_station_init(void) init_timer(&llc_main_station.ack_timer); llc_main_station.ack_timer.data = (unsigned long)&llc_main_station; llc_main_station.ack_timer.function = llc_station_ack_tmr_cb; - + llc_main_station.ack_timer.expires = jiffies + + sysctl_llc_station_ack_timeout; skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; @@ -697,7 +701,6 @@ int __init llc_station_init(void) llc_set_station_handler(llc_station_rcv); ev = llc_station_ev(skb); memset(ev, 0, sizeof(*ev)); - llc_main_station.ack_timer.expires = jiffies + 3 * HZ; llc_main_station.maximum_retry = 1; llc_main_station.state = LLC_STATION_STATE_DOWN; ev->type = LLC_STATION_EV_TYPE_SIMPLE; diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c new file mode 100644 index 000000000000..d1eaddb13633 --- /dev/null +++ b/net/llc/sysctl_net_llc.c @@ -0,0 +1,131 @@ +/* + * sysctl_net_llc.c: sysctl interface to LLC net subsystem. + * + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/sysctl.h> +#include <net/llc.h> + +#ifndef CONFIG_SYSCTL +#error This file should not be compiled without CONFIG_SYSCTL defined +#endif + +static struct ctl_table llc2_timeout_table[] = { + { + .ctl_name = NET_LLC2_ACK_TIMEOUT, + .procname = "ack", + .data = &sysctl_llc2_ack_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_BUSY_TIMEOUT, + .procname = "busy", + .data = &sysctl_llc2_busy_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_P_TIMEOUT, + .procname = "p", + .data = &sysctl_llc2_p_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_LLC2_REJ_TIMEOUT, + .procname = "rej", + .data = &sysctl_llc2_rej_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { 0 }, +}; + +static struct ctl_table llc_station_table[] = { + { + .ctl_name = NET_LLC_STATION_ACK_TIMEOUT, + .procname = "ack_timeout", + .data = &sysctl_llc_station_ack_timeout, + .maxlen = sizeof(long), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { 0 }, +}; + +static struct ctl_table llc2_dir_timeout_table[] = { + { + .ctl_name = NET_LLC2, + .procname = "timeout", + .mode = 0555, + .child = llc2_timeout_table, + }, + { 0 }, +}; + +static struct ctl_table llc_table[] = { + { + .ctl_name = NET_LLC2, + .procname = "llc2", + .mode = 0555, + .child = llc2_dir_timeout_table, + }, + { + .ctl_name = NET_LLC_STATION, + .procname = "station", + .mode = 0555, + .child = llc_station_table, + }, + { 0 }, +}; + +static struct ctl_table llc_dir_table[] = { + { + .ctl_name = NET_LLC, + .procname = "llc", + .mode = 0555, + .child = llc_table, + }, + { 0 }, +}; + +static struct ctl_table llc_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = llc_dir_table, + }, + { 0 }, +}; + +static struct ctl_table_header *llc_table_header; + +int __init llc_sysctl_init(void) +{ + llc_table_header = register_sysctl_table(llc_root_table, 1); + + return llc_table_header ? 0 : -ENOMEM; +} + +void llc_sysctl_exit(void) +{ + if (llc_table_header) { + unregister_sysctl_table(llc_table_header); + llc_table_header = NULL; + } +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 49a3900e3d32..4bc27a6334c1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -133,7 +133,7 @@ int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) memset(tb, 0, sizeof(struct nfattr *) * maxattr); while (NFA_OK(nfa, len)) { - unsigned flavor = nfa->nfa_type; + unsigned flavor = NFA_TYPE(nfa); if (flavor && flavor <= maxattr) tb[flavor-1] = nfa; nfa = NFA_NEXT(nfa, len); @@ -177,7 +177,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); while (NFA_OK(attr, attrlen)) { - unsigned flavor = attr->nfa_type; + unsigned flavor = NFA_TYPE(attr); if (flavor) { if (flavor > attr_count) return -EINVAL; @@ -195,7 +195,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { - int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + gfp_t allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; int err = 0; NETLINK_CB(skb).dst_group = group; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ff5601ceedcb..efcd10f996ba 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -494,8 +494,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->tstamp.off_sec) { struct nfulnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); + ts.sec = cpu_to_be64(skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb->tstamp.off_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f81fe8c52e99..eaa44c49567b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -492,8 +492,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); - ts.usec = cpu_to_be64(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); + ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec); + ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a64e1d5ce3ca..678c3f2c0d0b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -758,7 +758,7 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb) } static inline struct sk_buff *netlink_trim(struct sk_buff *skb, - unsigned int __nocast allocation) + gfp_t allocation) { int delta; @@ -880,7 +880,7 @@ out: } int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, unsigned int __nocast allocation) + u32 group, gfp_t allocation) { struct netlink_broadcast_data info; struct hlist_node *node; diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 4e66eef9a034..509afddae569 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -58,7 +58,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) /* Spoof incoming device */ skb->dev = dev; - skb->h.raw = skb->data; + skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; skb->pkt_type = PACKET_HOST; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8690f171c1ef..499ae3df4a44 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -36,6 +36,11 @@ * Michal Ostrowski : Module initialization cleanup. * Ulises Alonso : Frame number limit removal and * packet_set_ring memory leak. + * Eric Biederman : Allow for > 8 byte hardware addresses. + * The convention is that longer addresses + * will simply extend the hardware address + * byte arrays at the end of sockaddr_ll + * and packet_mreq. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,7 +166,17 @@ struct packet_mclist int count; unsigned short type; unsigned short alen; - unsigned char addr[8]; + unsigned char addr[MAX_ADDR_LEN]; +}; +/* identical to struct packet_mreq except it has + * a longer address field. + */ +struct packet_mreq_max +{ + int mr_ifindex; + unsigned short mr_type; + unsigned short mr_alen; + unsigned char mr_address[MAX_ADDR_LEN]; }; #endif #ifdef CONFIG_PACKET_MMAP @@ -639,8 +654,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; - h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; + h->tp_sec = skb->tstamp.off_sec; + h->tp_usec = skb->tstamp.off_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; @@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; + if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) + goto out; ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; @@ -1045,6 +1062,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; + struct sockaddr_ll *sll; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1057,16 +1075,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, #endif /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sizeof(struct sockaddr_ll); - - /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. @@ -1087,6 +1095,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; /* + * If the address length field is there to be filled in, we fill + * it in now. + */ + + sll = (struct sockaddr_ll*)skb->cb; + if (sock->type == SOCK_PACKET) + msg->msg_namelen = sizeof(struct sockaddr_pkt); + else + msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + + /* * You lose any data beyond the buffer you gave. If it worries a * user program they can ask the device for its MTU anyway. */ @@ -1166,7 +1185,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } - *uaddr_len = sizeof(*sll); + *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; return 0; } @@ -1199,7 +1218,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i } } -static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; @@ -1249,7 +1268,7 @@ done: return err; } -static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; @@ -1315,11 +1334,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { - struct packet_mreq mreq; - if (optlen<sizeof(mreq)) + struct packet_mreq_max mreq; + int len = optlen; + memset(&mreq, 0, sizeof(mreq)); + if (len < sizeof(struct packet_mreq)) return -EINVAL; - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (len > sizeof(mreq)) + len = sizeof(mreq); + if (copy_from_user(&mreq,optval,len)) return -EFAULT; + if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) + return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5acb1680524a..829fdbc4400b 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1472,22 +1472,25 @@ static const char banner[] = KERN_INFO "F6FBB/G4KLX ROSE for Linux. Version 0.62 static int __init rose_proto_init(void) { int i; - int rc = proto_register(&rose_proto, 0); + int rc; + if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { + printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); + rc = -EINVAL; + goto out; + } + + rc = proto_register(&rose_proto, 0); if (rc != 0) goto out; rose_callsign = null_ax25_address; - if (rose_ndevs > 0x7FFFFFFF/sizeof(struct net_device *)) { - printk(KERN_ERR "ROSE: rose_proto_init - rose_ndevs parameter to large\n"); - return -1; - } - dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); - return -1; + rc = -ENOMEM; + goto out_proto_unregister; } memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*)); @@ -1500,10 +1503,12 @@ static int __init rose_proto_init(void) name, rose_setup); if (!dev) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); + rc = -ENOMEM; goto fail; } - if (register_netdev(dev)) { - printk(KERN_ERR "ROSE: netdevice regeistration failed\n"); + rc = register_netdev(dev); + if (rc) { + printk(KERN_ERR "ROSE: netdevice registration failed\n"); free_netdev(dev); goto fail; } @@ -1536,8 +1541,9 @@ fail: free_netdev(dev_rose[i]); } kfree(dev_rose); +out_proto_unregister: proto_unregister(&rose_proto); - return -ENOMEM; + goto out; } module_init(rose_proto_init); diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c index 5cfd4cadee42..c4aeb7d40266 100644 --- a/net/rxrpc/call.c +++ b/net/rxrpc/call.c @@ -1923,7 +1923,7 @@ int rxrpc_call_write_data(struct rxrpc_call *call, size_t sioc, struct kvec *siov, u8 rxhdr_flags, - int alloc_flags, + gfp_t alloc_flags, int dup_data, size_t *size_sent) { diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c index 61463c74f8cc..2ba14a75dbbe 100644 --- a/net/rxrpc/connection.c +++ b/net/rxrpc/connection.c @@ -522,7 +522,7 @@ int rxrpc_conn_newmsg(struct rxrpc_connection *conn, uint8_t type, int dcount, struct kvec diov[], - int alloc_flags, + gfp_t alloc_flags, struct rxrpc_message **_msg) { struct rxrpc_message *msg; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 45d3bc0812c8..81510da31792 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -72,9 +72,11 @@ config NET_SCH_CLK_GETTIMEOFDAY Choose this if you need a high resolution clock source but can't use the CPU's cycle counter. +# don't allow on SMP x86 because they can have unsynchronized TSCs. +# gettimeofday is a good alternative config NET_SCH_CLK_CPU bool "CPU cycle counter" - depends on X86_TSC || X86_64 || ALPHA || SPARC64 || PPC64 || IA64 + depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 help Say Y here if you want to use the CPU's cycle counter as clock source. This is a cheap and high resolution clock source, but on some diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 00eae5f9a01a..cf68a59fdc5a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -393,10 +393,10 @@ META_COLLECTOR(int_sk_route_caps) dst->value = skb->sk->sk_route_caps; } -META_COLLECTOR(int_sk_hashent) +META_COLLECTOR(int_sk_hash) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_hashent; + dst->value = skb->sk->sk_hash; } META_COLLECTOR(int_sk_lingertime) @@ -515,7 +515,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), - [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent), + [META_ID(SK_HASH)] = META_FUNC(int_sk_hash), [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5b24ae0650d3..12b0f582a66b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a const struct sctp_endpoint *ep, const struct sock *sk, sctp_scope_t scope, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_sock *sp; int i; @@ -273,7 +273,7 @@ fail_init: struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk, sctp_scope_t scope, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_association *asoc; @@ -479,7 +479,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, /* Add a transport address to an association. */ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const union sctp_addr *addr, - const unsigned int __nocast gfp, + const gfp_t gfp, const int peer_state) { struct sctp_transport *peer; @@ -1231,7 +1231,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) * local endpoint and the remote peer. */ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, - unsigned int __nocast gfp) + gfp_t gfp) { sctp_scope_t scope; int flags; @@ -1254,7 +1254,7 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, /* Build the association's bind address list from the cookie. */ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc, struct sctp_cookie *cookie, - unsigned int __nocast gfp) + gfp_t gfp) { int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length); int var_size3 = cookie->raw_addr_list_len; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f71549710f2e..2b962627f631 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -53,7 +53,7 @@ /* Forward declarations for internal helpers. */ static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); @@ -64,7 +64,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); */ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags) { struct sctp_sockaddr_entry *addr; @@ -146,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -200,7 +200,7 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) */ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int *addrs_len, - unsigned int __nocast gfp) + gfp_t gfp) { union sctp_params addrparms; union sctp_params retval; @@ -252,7 +252,7 @@ end_raw: * address parameters). */ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, - int addrs_len, __u16 port, unsigned int __nocast gfp) + int addrs_len, __u16 port, gfp_t gfp) { union sctp_addr_param *rawaddr; struct sctp_paramhdr *param; @@ -350,7 +350,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, /* Copy out addresses from the global local address list. */ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, union sctp_addr *addr, - sctp_scope_t scope, unsigned int __nocast gfp, + sctp_scope_t scope, gfp_t gfp, int flags) { int error = 0; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 61da2937e641..83ef411772f4 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -62,7 +62,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) } /* Allocate and initialize datamsg. */ -SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(unsigned int __nocast gfp) +SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) { struct sctp_datamsg *msg; msg = kmalloc(sizeof(struct sctp_datamsg), gfp); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index e22ccd655965..96984f7a2d69 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -68,7 +68,7 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep); */ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_sock *sp = sctp_sk(sk); memset(ep, 0, sizeof(struct sctp_endpoint)); @@ -138,8 +138,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Create a sctp_endpoint with all that boring stuff initialized. * Returns NULL if there isn't enough memory. */ -struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, - unsigned int __nocast gfp) +struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) { struct sctp_endpoint *ep; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e7025be77691..26de4d3e1bd9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -147,7 +147,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, struct sctp_sockaddr_entry *addr; rcu_read_lock(); - if ((in_dev = __in_dev_get(dev)) == NULL) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { rcu_read_unlock(); return; } @@ -219,7 +219,7 @@ static void sctp_free_local_addr_list(void) /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, - unsigned int __nocast gfp, int copy_flags) + gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 3868a8d70cc0..10e82ec2ebd3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -78,7 +78,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, - unsigned int __nocast gfp); + gfp_t gfp); /* What was the inbound interface for this chunk? */ int sctp_chunk_iif(const struct sctp_chunk *chunk) @@ -174,7 +174,7 @@ void sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code, */ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, - unsigned int __nocast gfp, int vparam_len) + gfp_t gfp, int vparam_len) { sctp_inithdr_t init; union sctp_params addrs; @@ -261,7 +261,7 @@ nodata: struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, - unsigned int __nocast gfp, int unkparam_len) + gfp_t gfp, int unkparam_len) { sctp_inithdr_t initack; struct sctp_chunk *retval; @@ -1234,7 +1234,7 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk) /* Create a CLOSED association to use with an incoming packet. */ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_association *asoc; struct sk_buff *skb; @@ -1349,7 +1349,7 @@ nodata: struct sctp_association *sctp_unpack_cookie( const struct sctp_endpoint *ep, const struct sctp_association *asoc, - struct sctp_chunk *chunk, unsigned int __nocast gfp, + struct sctp_chunk *chunk, gfp_t gfp, int *error, struct sctp_chunk **errp) { struct sctp_association *retval = NULL; @@ -1814,7 +1814,7 @@ int sctp_verify_init(const struct sctp_association *asoc, */ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, const union sctp_addr *peer_addr, - sctp_init_chunk_t *peer_init, unsigned int __nocast gfp) + sctp_init_chunk_t *peer_init, gfp_t gfp) { union sctp_params param; struct sctp_transport *transport; @@ -1985,7 +1985,7 @@ nomem: static int sctp_process_param(struct sctp_association *asoc, union sctp_params param, const union sctp_addr *peer_addr, - unsigned int __nocast gfp) + gfp_t gfp) { union sctp_addr addr; int i; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 39c970b5b198..f84173ea8ec1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp); + gfp_t gfp); static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, @@ -71,7 +71,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp); + gfp_t gfp); /******************************************************************** * Helper functions @@ -498,7 +498,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_init_chunk_t *peer_init, - unsigned int __nocast gfp) + gfp_t gfp) { int error; @@ -853,7 +853,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, - unsigned int __nocast gfp) + gfp_t gfp) { sctp_cmd_seq_t commands; const sctp_sm_table_entry_t *state_fn; @@ -898,7 +898,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp) + gfp_t gfp) { int error; @@ -986,7 +986,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, void *event_arg, sctp_disposition_t status, sctp_cmd_seq_t *commands, - unsigned int __nocast gfp) + gfp_t gfp) { int error = 0; int force; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 86073df418f5..505c7de10c50 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2414,6 +2414,17 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT + * When a peer sends a SHUTDOWN, SCTP delivers this notification to + * inform the application that it should cease sending data. + */ + ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); + if (!ev) { + disposition = SCTP_DISPOSITION_NOMEM; + goto out; + } + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + /* Upon the reception of the SHUTDOWN, the peer endpoint shall * - enter the SHUTDOWN-RECEIVED state, * - stop accepting new data from its SCTP user @@ -2439,17 +2450,6 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack)); - /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT - * When a peer sends a SHUTDOWN, SCTP delivers this notification to - * inform the application that it should cease sending data. - */ - ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); - if (!ev) { - disposition = SCTP_DISPOSITION_NOMEM; - goto out; - } - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - out: return disposition; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 91ec8c936913..02e068d3450d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3159,8 +3159,9 @@ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval return 0; } -static int sctp_getsockopt_peer_addrs_num(struct sock *sk, int len, - char __user *optval, int __user *optlen) +static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, + char __user *optval, + int __user *optlen) { sctp_assoc_t id; struct sctp_association *asoc; @@ -3185,23 +3186,28 @@ static int sctp_getsockopt_peer_addrs_num(struct sock *sk, int len, return cnt; } -static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, - char __user *optval, int __user *optlen) +/* + * Old API for getting list of peer addresses. Does not work for 32-bit + * programs running on a 64-bit kernel + */ +static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, + char __user *optval, + int __user *optlen) { struct sctp_association *asoc; struct list_head *pos; int cnt = 0; - struct sctp_getaddrs getaddrs; + struct sctp_getaddrs_old getaddrs; struct sctp_transport *from; void __user *to; union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - if (len != sizeof(struct sctp_getaddrs)) + if (len != sizeof(struct sctp_getaddrs_old)) return -EINVAL; - if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) + if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old))) return -EFAULT; if (getaddrs.addr_num <= 0) return -EINVAL; @@ -3225,15 +3231,69 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, if (cnt >= getaddrs.addr_num) break; } getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs))) + if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) + return -EFAULT; + + return 0; +} + +static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_association *asoc; + struct list_head *pos; + int cnt = 0; + struct sctp_getaddrs getaddrs; + struct sctp_transport *from; + void __user *to; + union sctp_addr temp; + struct sctp_sock *sp = sctp_sk(sk); + int addrlen; + size_t space_left; + int bytes_copied; + + if (len < sizeof(struct sctp_getaddrs)) + return -EINVAL; + + if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) + return -EFAULT; + + /* For UDP-style sockets, id specifies the association to query. */ + asoc = sctp_id2assoc(sk, getaddrs.assoc_id); + if (!asoc) + return -EINVAL; + + to = optval + offsetof(struct sctp_getaddrs,addrs); + space_left = len - sizeof(struct sctp_getaddrs) - + offsetof(struct sctp_getaddrs,addrs); + + list_for_each(pos, &asoc->peer.transport_addr_list) { + from = list_entry(pos, struct sctp_transport, transports); + memcpy(&temp, &from->ipaddr, sizeof(temp)); + sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); + addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; + if(space_left < addrlen) + return -ENOMEM; + temp.v4.sin_port = htons(temp.v4.sin_port); + if (copy_to_user(to, &temp, addrlen)) + return -EFAULT; + to += addrlen; + cnt++; + space_left -= addrlen; + } + + if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) + return -EFAULT; + bytes_copied = ((char __user *)to) - optval; + if (put_user(bytes_copied, optlen)) return -EFAULT; return 0; } -static int sctp_getsockopt_local_addrs_num(struct sock *sk, int len, - char __user *optval, - int __user *optlen) +static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, + char __user *optval, + int __user *optlen) { sctp_assoc_t id; struct sctp_bind_addr *bp; @@ -3306,8 +3366,8 @@ done: /* Helper function that copies local addresses to user and returns the number * of addresses copied. */ -static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, int max_addrs, - void __user *to) +static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs, + void __user *to) { struct list_head *pos; struct sctp_sockaddr_entry *addr; @@ -3341,14 +3401,54 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, int max_addrs, return cnt; } -static int sctp_getsockopt_local_addrs(struct sock *sk, int len, - char __user *optval, int __user *optlen) +static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, + void * __user *to, size_t space_left) +{ + struct list_head *pos; + struct sctp_sockaddr_entry *addr; + unsigned long flags; + union sctp_addr temp; + int cnt = 0; + int addrlen; + + sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); + list_for_each(pos, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if ((PF_INET == sk->sk_family) && + (AF_INET6 == addr->a.sa.sa_family)) + continue; + memcpy(&temp, &addr->a, sizeof(temp)); + sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), + &temp); + addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; + if(space_left<addrlen) + return -ENOMEM; + temp.v4.sin_port = htons(port); + if (copy_to_user(*to, &temp, addrlen)) { + sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, + flags); + return -EFAULT; + } + *to += addrlen; + cnt ++; + space_left -= addrlen; + } + sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); + + return cnt; +} + +/* Old API for getting list of local addresses. Does not work for 32-bit + * programs running on a 64-bit kernel + */ +static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, + char __user *optval, int __user *optlen) { struct sctp_bind_addr *bp; struct sctp_association *asoc; struct list_head *pos; int cnt = 0; - struct sctp_getaddrs getaddrs; + struct sctp_getaddrs_old getaddrs; struct sctp_sockaddr_entry *addr; void __user *to; union sctp_addr temp; @@ -3357,10 +3457,10 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, rwlock_t *addr_lock; int err = 0; - if (len != sizeof(struct sctp_getaddrs)) + if (len != sizeof(struct sctp_getaddrs_old)) return -EINVAL; - if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) + if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old))) return -EFAULT; if (getaddrs.addr_num <= 0) return -EINVAL; @@ -3392,8 +3492,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - cnt = sctp_copy_laddrs_to_user(sk, bp->port, - getaddrs.addr_num, to); + cnt = sctp_copy_laddrs_to_user_old(sk, bp->port, + getaddrs.addr_num, + to); if (cnt < 0) { err = cnt; goto unlock; @@ -3419,7 +3520,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, copy_getaddrs: getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs))) + if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) err = -EFAULT; unlock: @@ -3427,6 +3528,99 @@ unlock: return err; } +static int sctp_getsockopt_local_addrs(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_bind_addr *bp; + struct sctp_association *asoc; + struct list_head *pos; + int cnt = 0; + struct sctp_getaddrs getaddrs; + struct sctp_sockaddr_entry *addr; + void __user *to; + union sctp_addr temp; + struct sctp_sock *sp = sctp_sk(sk); + int addrlen; + rwlock_t *addr_lock; + int err = 0; + size_t space_left; + int bytes_copied; + + if (len <= sizeof(struct sctp_getaddrs)) + return -EINVAL; + + if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) + return -EFAULT; + + /* + * For UDP-style sockets, id specifies the association to query. + * If the id field is set to the value '0' then the locally bound + * addresses are returned without regard to any particular + * association. + */ + if (0 == getaddrs.assoc_id) { + bp = &sctp_sk(sk)->ep->base.bind_addr; + addr_lock = &sctp_sk(sk)->ep->base.addr_lock; + } else { + asoc = sctp_id2assoc(sk, getaddrs.assoc_id); + if (!asoc) + return -EINVAL; + bp = &asoc->base.bind_addr; + addr_lock = &asoc->base.addr_lock; + } + + to = optval + offsetof(struct sctp_getaddrs,addrs); + space_left = len - sizeof(struct sctp_getaddrs) - + offsetof(struct sctp_getaddrs,addrs); + + sctp_read_lock(addr_lock); + + /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid + * addresses from the global local address list. + */ + if (sctp_list_single_entry(&bp->address_list)) { + addr = list_entry(bp->address_list.next, + struct sctp_sockaddr_entry, list); + if (sctp_is_any(&addr->a)) { + cnt = sctp_copy_laddrs_to_user(sk, bp->port, + &to, space_left); + if (cnt < 0) { + err = cnt; + goto unlock; + } + goto copy_getaddrs; + } + } + + list_for_each(pos, &bp->address_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + memcpy(&temp, &addr->a, sizeof(temp)); + sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); + addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; + if(space_left < addrlen) + return -ENOMEM; /*fixme: right error?*/ + temp.v4.sin_port = htons(temp.v4.sin_port); + if (copy_to_user(to, &temp, addrlen)) { + err = -EFAULT; + goto unlock; + } + to += addrlen; + cnt ++; + space_left -= addrlen; + } + +copy_getaddrs: + if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) + return -EFAULT; + bytes_copied = ((char __user *)to) - optval; + if (put_user(bytes_copied, optlen)) + return -EFAULT; + +unlock: + sctp_read_unlock(addr_lock); + return err; +} + /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) * * Requests that the local SCTP stack use the enclosed peer address as @@ -3807,12 +4001,20 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDRS_NUM: - retval = sctp_getsockopt_peer_addrs_num(sk, len, optval, + case SCTP_GET_PEER_ADDRS_NUM_OLD: + retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval, + optlen); + break; + case SCTP_GET_LOCAL_ADDRS_NUM_OLD: + retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval, + optlen); + break; + case SCTP_GET_PEER_ADDRS_OLD: + retval = sctp_getsockopt_peer_addrs_old(sk, len, optval, optlen); break; - case SCTP_GET_LOCAL_ADDRS_NUM: - retval = sctp_getsockopt_local_addrs_num(sk, len, optval, + case SCTP_GET_LOCAL_ADDRS_OLD: + retval = sctp_getsockopt_local_addrs_old(sk, len, optval, optlen); break; case SCTP_GET_PEER_ADDRS: diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 25037daf3fa0..cbe2513d2822 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -58,7 +58,7 @@ static inline size_t sctp_ssnmap_size(__u16 in, __u16 out) * Allocate room to store at least 'len' contiguous TSNs. */ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ssnmap *retval; int size; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d2f04ebe5081..6bc27200e6ca 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -57,7 +57,7 @@ /* Initialize a new transport from provided memory. */ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, const union sctp_addr *addr, - unsigned int __nocast gfp) + gfp_t gfp) { /* Copy in the address. */ peer->ipaddr = *addr; @@ -122,7 +122,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Allocate and initialize a new transport. */ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_transport *transport; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 0abd5101107c..057e7fac3af0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -74,7 +74,7 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags) /* Create a new sctp_ulpevent. */ SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event; struct sk_buff *skb; @@ -136,7 +136,7 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event) struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( const struct sctp_association *asoc, __u16 flags, __u16 state, __u16 error, __u16 outbound, - __u16 inbound, unsigned int __nocast gfp) + __u16 inbound, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_assoc_change *sac; @@ -237,7 +237,7 @@ fail: struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( const struct sctp_association *asoc, const struct sockaddr_storage *aaddr, - int flags, int state, int error, unsigned int __nocast gfp) + int flags, int state, int error, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_paddr_change *spc; @@ -350,7 +350,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, unsigned int __nocast gfp) + __u16 flags, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_remote_error *sre; @@ -448,7 +448,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, __u32 error, unsigned int __nocast gfp) + __u16 flags, __u32 error, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_send_failed *ssf; @@ -557,7 +557,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( const struct sctp_association *asoc, - __u16 flags, unsigned int __nocast gfp) + __u16 flags, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_shutdown_event *sse; @@ -620,7 +620,7 @@ fail: * 5.3.1.6 SCTP_ADAPTION_INDICATION */ struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication( - const struct sctp_association *asoc, unsigned int __nocast gfp) + const struct sctp_association *asoc, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_adaption_event *sai; @@ -657,7 +657,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event = NULL; struct sk_buff *skb; @@ -719,7 +719,7 @@ fail: */ struct sctp_ulpevent *sctp_ulpevent_make_pdapi( const struct sctp_association *asoc, __u32 indication, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_pdapi_event *pd; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ec2c857eae7f..2080b2d28c98 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -100,7 +100,7 @@ void sctp_ulpq_free(struct sctp_ulpq *ulpq) /* Process an incoming DATA chunk. */ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sk_buff_head temp; sctp_data_chunk_t *hdr; @@ -792,7 +792,7 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_association *asoc; @@ -816,7 +816,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, /* Renege some packets to make room for an incoming chunk. */ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, - unsigned int __nocast gfp) + gfp_t gfp) { struct sctp_association *asoc; __u16 needed, freed; @@ -855,7 +855,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ -void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, unsigned int __nocast gfp) +void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sock *sk; diff --git a/net/socket.c b/net/socket.c index c699e93c33d7..3145103cdf54 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1145,8 +1145,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res if (!try_module_get(net_families[family]->owner)) goto out_release; - if ((err = net_families[family]->create(sock, protocol)) < 0) + if ((err = net_families[family]->create(sock, protocol)) < 0) { + sock->ops = NULL; goto out_module_put; + } + /* * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. @@ -1360,16 +1363,16 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ newsock->type = sock->type; newsock->ops = sock->ops; - err = security_socket_accept(sock, newsock); - if (err) - goto out_release; - /* * We don't need try_module_get here, as the listening socket (sock) * has the protocol module (sock->ops->owner) held. */ __module_get(newsock->ops->owner); + err = security_socket_accept(sock, newsock); + if (err) + goto out_release; + err = sock->ops->accept(sock, newsock, sock->file->f_flags); if (err < 0) goto out_release; @@ -1700,7 +1703,9 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) struct socket *sock; char address[MAX_SOCK_ADDR]; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; - unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */ + unsigned char ctl[sizeof(struct cmsghdr) + 20] + __attribute__ ((aligned (sizeof(__kernel_size_t)))); + /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; struct msghdr msg_sys; int err, ctl_len, iov_size, total_len; @@ -1862,7 +1867,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err < 0) goto out_freeiov; } - err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); + err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f3104035e35d..54e60a657500 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -719,7 +719,7 @@ static void rpc_async_schedule(void *arg) void * rpc_malloc(struct rpc_task *task, size_t size) { - int gfp; + gfp_t gfp; if (task->tk_flags & RPC_TASK_SWAPPER) gfp = GFP_ATOMIC; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index c5241fcbb966..55538f6b60ff 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -16,6 +16,8 @@ #include <linux/mm.h> #include <linux/sysctl.h> +#include <net/sock.h> + #ifdef CONFIG_INET #include <net/ip.h> #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fda737d77edc..cbb0ba34a600 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -163,7 +163,7 @@ static void xfrm_policy_timer(unsigned long data) if (xp->dead) goto out; - dir = xp->index & 7; + dir = xfrm_policy_id2dir(xp->index); if (xp->lft.hard_add_expires_seconds) { long tmo = xp->lft.hard_add_expires_seconds + @@ -225,7 +225,7 @@ expired: * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(int gfp) +struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) { struct xfrm_policy *policy; @@ -417,7 +417,7 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) { + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { if (pol->index == id) { xfrm_pol_hold(pol); if (delete) |