diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/8021q/vlan_core.c | 4 | ||||
-rw-r--r-- | net/8021q/vlan_dev.c | 23 | ||||
-rw-r--r-- | net/core/datagram.c | 112 | ||||
-rw-r--r-- | net/core/dev.c | 81 | ||||
-rw-r--r-- | net/core/iovec.c | 33 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 5 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 4 | ||||
-rw-r--r-- | net/iucv/af_iucv.c | 404 | ||||
-rw-r--r-- | net/iucv/iucv.c | 43 | ||||
-rw-r--r-- | net/rds/af_rds.c | 1 | ||||
-rw-r--r-- | net/rds/connection.c | 4 | ||||
-rw-r--r-- | net/rds/ib.c | 4 | ||||
-rw-r--r-- | net/rds/ib.h | 2 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 2 | ||||
-rw-r--r-- | net/rds/ib_ring.c | 2 | ||||
-rw-r--r-- | net/rds/ib_send.c | 10 | ||||
-rw-r--r-- | net/rds/info.c | 5 | ||||
-rw-r--r-- | net/rds/iw.c | 4 | ||||
-rw-r--r-- | net/rds/iw.h | 2 | ||||
-rw-r--r-- | net/rds/iw_recv.c | 2 | ||||
-rw-r--r-- | net/rds/iw_ring.c | 2 | ||||
-rw-r--r-- | net/rds/iw_send.c | 10 | ||||
-rw-r--r-- | net/rds/rdma.c | 7 | ||||
-rw-r--r-- | net/rds/rdma_transport.c | 12 | ||||
-rw-r--r-- | net/rds/rds.h | 2 | ||||
-rw-r--r-- | net/rds/send.c | 10 |
27 files changed, 603 insertions, 189 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index c67fe6f75653..7f7de1a04de6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -114,9 +114,9 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, EXPORT_SYMBOL(vlan_gro_receive); int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct napi_gro_fraginfo *info) + unsigned int vlan_tci) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); + struct sk_buff *skb = napi_frags_skb(napi); if (!skb) return NET_RX_DROP; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6b0921364014..04dc8c8a6854 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -666,13 +666,7 @@ static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - struct net_device *real_dev = vlan->real_dev; - - if (!real_dev->ethtool_ops || - !real_dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - return real_dev->ethtool_ops->get_settings(real_dev, cmd); + return dev_ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, @@ -686,24 +680,13 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - struct net_device *real_dev = vlan->real_dev; - - if (real_dev->ethtool_ops == NULL || - real_dev->ethtool_ops->get_rx_csum == NULL) - return 0; - return real_dev->ethtool_ops->get_rx_csum(real_dev); + return dev_ethtool_get_rx_csum(vlan->real_dev); } static u32 vlan_ethtool_get_flags(struct net_device *dev) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - struct net_device *real_dev = vlan->real_dev; - - if (!(real_dev->features & NETIF_F_HW_VLAN_RX) || - real_dev->ethtool_ops == NULL || - real_dev->ethtool_ops->get_flags == NULL) - return 0; - return real_dev->ethtool_ops->get_flags(real_dev); + return dev_ethtool_get_flags(vlan->real_dev); } static const struct ethtool_ops vlan_ethtool_ops = { diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378d..914d5fa773b4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -339,17 +339,111 @@ fault: } /** + * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying from + * @to: io vector to copy to + * @to_offset: offset in the io vector to start copying to + * @len: amount of data to copy from buffer to iovec + * + * Returns 0 or -EFAULT. + * Note: the iovec is not modified during the copy. + */ +int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, + const struct iovec *to, int to_offset, + int len) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + WARN_ON(start > offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + int err; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + vaddr = kmap(page); + err = memcpy_toiovecend(to, vaddr + frag->page_offset + + offset - start, to_offset, copy); + kunmap(page); + if (err) + goto fault; + if (!(len -= copy)) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + WARN_ON(start > offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_datagram_const_iovec(list, + offset - start, + to, to_offset, + copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to_offset += copy; + } + start = end; + } + } + if (!len) + return 0; + +fault: + return -EFAULT; +} +EXPORT_SYMBOL(skb_copy_datagram_const_iovec); + +/** * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. * @skb: buffer to copy * @offset: offset in the buffer to start copying to * @from: io vector to copy to + * @from_offset: offset in the io vector to start copying from * @len: amount of data to copy to buffer from iovec * * Returns 0 or -EFAULT. - * Note: the iovec is modified during the copy. + * Note: the iovec is not modified during the copy. */ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - struct iovec *from, int len) + const struct iovec *from, int from_offset, + int len) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -358,11 +452,12 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > 0) { if (copy > len) copy = len; - if (memcpy_fromiovec(skb->data + offset, from, copy)) + if (memcpy_fromiovecend(skb->data + offset, from, 0, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; + from_offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ @@ -381,8 +476,9 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap(page); - err = memcpy_fromiovec(vaddr + frag->page_offset + - offset - start, from, copy); + err = memcpy_fromiovecend(vaddr + frag->page_offset + + offset - start, + from, from_offset, copy); kunmap(page); if (err) goto fault; @@ -390,6 +486,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, if (!(len -= copy)) return 0; offset += copy; + from_offset += copy; } start = end; } @@ -408,11 +505,14 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, copy = len; if (skb_copy_datagram_from_iovec(list, offset - start, - from, copy)) + from, + from_offset, + copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; + from_offset += copy; } start = end; } diff --git a/net/core/dev.c b/net/core/dev.c index 308a7d0c277f..e48c08af76ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2525,16 +2525,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) } EXPORT_SYMBOL(napi_reuse_skb); -struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, - struct napi_gro_fraginfo *info) +struct sk_buff *napi_get_frags(struct napi_struct *napi) { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - skb_frag_t *frag; - int i; - - napi->skb = NULL; if (!skb) { skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); @@ -2542,47 +2536,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, goto out; skb_reserve(skb, NET_IP_ALIGN); - } - - BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - frag = info->frags; - for (i = 0; i < info->nr_frags; i++) { - skb_fill_page_desc(skb, i, frag->page, frag->page_offset, - frag->size); - frag++; + napi->skb = skb; } - skb_shinfo(skb)->nr_frags = info->nr_frags; - - skb->data_len = info->len; - skb->len += info->len; - skb->truesize += info->len; - - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - eth = skb_gro_header(skb, sizeof(*eth)); - if (!eth) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } - - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - - skb->ip_summed = info->ip_summed; - skb->csum = info->csum; out: return skb; } -EXPORT_SYMBOL(napi_fraginfo_skb); +EXPORT_SYMBOL(napi_get_frags); int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { @@ -2612,9 +2573,39 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) } EXPORT_SYMBOL(napi_frags_finish); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +struct sk_buff *napi_frags_skb(struct napi_struct *napi) +{ + struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + + napi->skb = NULL; + + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } + + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; + +out: + return skb; +} +EXPORT_SYMBOL(napi_frags_skb); + +int napi_gro_frags(struct napi_struct *napi) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); + struct sk_buff *skb = napi_frags_skb(napi); if (!skb) return NET_RX_DROP; @@ -2718,7 +2709,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); - kfree_skb(napi->skb); + napi_free_frags(napi); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; diff --git a/net/core/iovec.c b/net/core/iovec.c index 4c9c0121c9da..40a76ce19d9f 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -98,6 +98,31 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) } /* + * Copy kernel to iovec. Returns -EFAULT on error. + */ + +int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, + int offset, int len) +{ + int copy; + for (; len > 0; ++iov) { + /* Skip over the finished iovecs */ + if (unlikely(offset >= iov->iov_len)) { + offset -= iov->iov_len; + continue; + } + copy = min_t(unsigned int, iov->iov_len - offset, len); + offset = 0; + if (copy_to_user(iov->iov_base, kdata, copy)) + return -EFAULT; + kdata += copy; + len -= copy; + } + + return 0; +} + +/* * Copy iovec to kernel. Returns -EFAULT on error. * * Note: this modifies the original iovec. @@ -122,10 +147,11 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) } /* - * For use with ip_build_xmit + * Copy iovec from kernel. Returns -EFAULT on error. */ -int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, - int len) + +int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len) { /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { @@ -236,3 +262,4 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend); EXPORT_SYMBOL(memcpy_fromiovec); EXPORT_SYMBOL(memcpy_fromiovecend); EXPORT_SYMBOL(memcpy_toiovec); +EXPORT_SYMBOL(memcpy_toiovecend); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7f03373b8c07..170689681aa2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1003,8 +1003,6 @@ void inet_register_protosw(struct inet_protosw *p) out: spin_unlock_bh(&inetsw_lock); - synchronize_net(); - return; out_permanent: diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b35a950d2e06..cd2b97f1b6e1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -161,13 +161,12 @@ static __u16 const msstab[] = { */ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) { - struct tcp_sock *tp = tcp_sk(sk); const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); int mssind; const __u16 mss = *mssp; - tp->last_synq_overflow = jiffies; + tcp_synq_overflow(sk); /* XXX sort msstab[] by probability? Binary search? */ for (mssind = 0; mss > msstab[mssind + 1]; mssind++) @@ -268,7 +267,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!sysctl_tcp_syncookies || !th->ack) goto out; - if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + if (tcp_synq_no_recent_overflow(sk) || (mss = cookie_check(skb, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 711175e0571f..8c2513982b61 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -131,7 +131,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) int mssind; const __u16 mss = *mssp; - tcp_sk(sk)->last_synq_overflow = jiffies; + tcp_synq_overflow(sk); for (mssind = 0; mss > msstab[mssind + 1]; mssind++) ; @@ -175,7 +175,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!sysctl_tcp_syncookies || !th->ack) goto out; - if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + if (tcp_synq_no_recent_overflow(sk) || (mss = cookie_check(skb, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index b51c9187c347..0fc00087ea8b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -29,10 +29,7 @@ #include <net/iucv/iucv.h> #include <net/iucv/af_iucv.h> -#define CONFIG_IUCV_SOCK_DEBUG 1 - -#define IPRMDATA 0x80 -#define VERSION "1.0" +#define VERSION "1.1" static char iucv_userid[80]; @@ -44,6 +41,19 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +/* special AF_IUCV IPRM messages */ +static const u8 iprm_shutdown[8] = + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; + +#define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) + +/* macros to set/get socket control buffer at correct offset */ +#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ +#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) +#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ +#define CB_TRGCLS_LEN (TRGCLS_SIZE) + + static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -54,6 +64,7 @@ static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]); static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); +static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]); static struct iucv_sock_list iucv_sk_list = { .lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock), @@ -65,7 +76,8 @@ static struct iucv_handler af_iucv_handler = { .path_complete = iucv_callback_connack, .path_severed = iucv_callback_connrej, .message_pending = iucv_callback_rx, - .message_complete = iucv_callback_txdone + .message_complete = iucv_callback_txdone, + .path_quiesced = iucv_callback_shutdown, }; static inline void high_nmcpy(unsigned char *dst, char *src) @@ -78,6 +90,37 @@ static inline void low_nmcpy(unsigned char *dst, char *src) memcpy(&dst[8], src, 8); } +/** + * iucv_msg_length() - Returns the length of an iucv message. + * @msg: Pointer to struct iucv_message, MUST NOT be NULL + * + * The function returns the length of the specified iucv message @msg of data + * stored in a buffer and of data stored in the parameter list (PRMDATA). + * + * For IUCV_IPRMDATA, AF_IUCV uses the following convention to transport socket + * data: + * PRMDATA[0..6] socket data (max 7 bytes); + * PRMDATA[7] socket data length value (len is 0xff - PRMDATA[7]) + * + * The socket data length is computed by substracting the socket data length + * value from 0xFF. + * If the socket data len is greater 7, then PRMDATA can be used for special + * notifications (see iucv_sock_shutdown); and further, + * if the socket data len is > 7, the function returns 8. + * + * Use this function to allocate socket buffers to store iucv message data. + */ +static inline size_t iucv_msg_length(struct iucv_message *msg) +{ + size_t datalen; + + if (msg->flags & IUCV_IPRMDATA) { + datalen = 0xff - msg->rmmsg[7]; + return (datalen < 8) ? datalen : 8; + } + return msg->length; +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -225,6 +268,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; + iucv_sk(sk)->flags = 0; + iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; iucv_sk(sk)->path = NULL; memset(&iucv_sk(sk)->src_user_id , 0, 32); @@ -248,11 +293,22 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; - if (sock->type != SOCK_STREAM) - return -ESOCKTNOSUPPORT; + if (protocol && protocol != PF_IUCV) + return -EPROTONOSUPPORT; sock->state = SS_UNCONNECTED; - sock->ops = &iucv_sock_ops; + + switch (sock->type) { + case SOCK_STREAM: + sock->ops = &iucv_sock_ops; + break; + case SOCK_SEQPACKET: + /* currently, proto ops can handle both sk types */ + sock->ops = &iucv_sock_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); if (!sk) @@ -463,11 +519,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) return -EBADFD; - if (sk->sk_type != SOCK_STREAM) + if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) return -EINVAL; - iucv = iucv_sk(sk); - if (sk->sk_state == IUCV_OPEN) { err = iucv_sock_autobind(sk); if (unlikely(err)) @@ -486,8 +540,8 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, iucv = iucv_sk(sk); /* Create path. */ - iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT, - IPRMDATA, GFP_KERNEL); + iucv->path = iucv_path_alloc(iucv->msglimit, + IUCV_IPRMDATA, GFP_KERNEL); if (!iucv->path) { err = -ENOMEM; goto done; @@ -521,8 +575,7 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, } if (sk->sk_state == IUCV_DISCONN) { - release_sock(sk); - return -ECONNREFUSED; + err = -ECONNREFUSED; } if (err) { @@ -545,7 +598,10 @@ static int iucv_sock_listen(struct socket *sock, int backlog) lock_sock(sk); err = -EINVAL; - if (sk->sk_state != IUCV_BOUND || sock->type != SOCK_STREAM) + if (sk->sk_state != IUCV_BOUND) + goto done; + + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto done; sk->sk_max_ack_backlog = backlog; @@ -636,6 +692,30 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, return 0; } +/** + * iucv_send_iprm() - Send socket data in parameter list of an iucv message. + * @path: IUCV path + * @msg: Pointer to a struct iucv_message + * @skb: The socket data to send, skb->len MUST BE <= 7 + * + * Send the socket data in the parameter list in the iucv message + * (IUCV_IPRMDATA). The socket data is stored at index 0 to 6 in the parameter + * list and the socket data len at index 7 (last byte). + * See also iucv_msg_length(). + * + * Returns the error code from the iucv_message_send() call. + */ +static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, + struct sk_buff *skb) +{ + u8 prmdata[8]; + + memcpy(prmdata, (void *) skb->data, skb->len); + prmdata[7] = 0xff - (u8) skb->len; + return iucv_message_send(path, msg, IUCV_IPRMDATA, 0, + (void *) prmdata, 8); +} + static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -643,6 +723,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_sock *iucv = iucv_sk(sk); struct sk_buff *skb; struct iucv_message txmsg; + struct cmsghdr *cmsg; + int cmsg_done; char user_id[9]; char appl_id[9]; int err; @@ -654,6 +736,10 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + /* SOCK_SEQPACKET: we do not support segmented records */ + if (sk->sk_type == SOCK_SEQPACKET && !(msg->msg_flags & MSG_EOR)) + return -EOPNOTSUPP; + lock_sock(sk); if (sk->sk_shutdown & SEND_SHUTDOWN) { @@ -662,6 +748,52 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } if (sk->sk_state == IUCV_CONNECTED) { + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; + + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; + goto out; + } + + if (cmsg->cmsg_level != SOL_IUCV) + continue; + + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; + } + cmsg_done |= cmsg->cmsg_type; + + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { + err = -EINVAL; + goto out; + } + + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + + break; + + default: + err = -EINVAL; + goto out; + break; + } + } + + /* allocate one skb for each iucv message: + * this is fine for SOCK_SEQPACKET (unless we want to support + * segmented records using the MSG_EOR flag), but + * for SOCK_STREAM we might want to improve it in future */ if (!(skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, &err))) @@ -672,13 +804,33 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } - txmsg.class = 0; - memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len); + /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(skb->cb, &txmsg.tag, 4); + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); skb_queue_tail(&iucv->send_skb_q, skb); - err = iucv_message_send(iucv->path, &txmsg, 0, 0, - (void *) skb->data, skb->len); + + if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) + && skb->len <= 7) { + err = iucv_send_iprm(iucv->path, &txmsg, skb); + + /* on success: there is no message_complete callback + * for an IPRMDATA msg; remove skb from send queue */ + if (err == 0) { + skb_unlink(skb, &iucv->send_skb_q); + kfree_skb(skb); + } + + /* this error should never happen since the + * IUCV_IPRMDATA path flag is set... sever path */ + if (err == 0x15) { + iucv_path_sever(iucv->path, NULL); + skb_unlink(skb, &iucv->send_skb_q); + err = -EPIPE; + goto fail; + } + } else + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); if (err) { if (err == 3) { user_id[8] = 0; @@ -725,6 +877,10 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) if (!nskb) return -ENOMEM; + /* copy target class to control buffer of new skb */ + memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + + /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); copied += size; dataleft -= size; @@ -744,19 +900,33 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, struct iucv_message *msg) { int rc; + unsigned int len; + + len = iucv_msg_length(msg); + + /* store msg target class in the second 4 bytes of skb ctrl buffer */ + /* Note: the first 4 bytes are reserved for msg tag */ + memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); - if (msg->flags & IPRMDATA) { - skb->data = NULL; - skb->len = 0; + /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ + if ((msg->flags & IUCV_IPRMDATA) && len > 7) { + if (memcmp(msg->rmmsg, iprm_shutdown, 8) == 0) { + skb->data = NULL; + skb->len = 0; + } } else { - rc = iucv_message_receive(path, msg, 0, skb->data, - msg->length, NULL); + rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA, + skb->data, len, NULL); if (rc) { kfree_skb(skb); return; } - if (skb->truesize >= sk->sk_rcvbuf / 4) { - rc = iucv_fragment_skb(sk, skb, msg->length); + /* we need to fragment iucv messages for SOCK_STREAM only; + * for SOCK_SEQPACKET, it is only relevant if we support + * record segmentation using MSG_EOR (see also recvmsg()) */ + if (sk->sk_type == SOCK_STREAM && + skb->truesize >= sk->sk_rcvbuf / 4) { + rc = iucv_fragment_skb(sk, skb, len); kfree_skb(skb); skb = NULL; if (rc) { @@ -767,7 +937,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } else { skb_reset_transport_header(skb); skb_reset_network_header(skb); - skb->len = msg->length; + skb->len = len; } } @@ -782,7 +952,7 @@ static void iucv_process_message_q(struct sock *sk) struct sock_msg_q *p, *n; list_for_each_entry_safe(p, n, &iucv->message_q.list, list) { - skb = alloc_skb(p->msg.length, GFP_ATOMIC | GFP_DMA); + skb = alloc_skb(iucv_msg_length(&p->msg), GFP_ATOMIC | GFP_DMA); if (!skb) break; iucv_process_message(sk, skb, p->path, &p->msg); @@ -799,7 +969,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); - int target, copied = 0; + unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; @@ -823,25 +993,45 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - copied = min_t(unsigned int, skb->len, len); + rlen = skb->len; /* real length of skb */ + copied = min_t(unsigned int, rlen, len); cskb = skb; if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { - skb_queue_head(&sk->sk_receive_queue, skb); - if (copied == 0) - return -EFAULT; - goto done; + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + return -EFAULT; } - len -= copied; + /* SOCK_SEQPACKET: set MSG_TRUNC if recv buf size is too small */ + if (sk->sk_type == SOCK_SEQPACKET) { + if (copied < rlen) + msg->msg_flags |= MSG_TRUNC; + /* each iucv message contains a complete record */ + msg->msg_flags |= MSG_EOR; + } + + /* create control message to store iucv msg target class: + * get the trgcls from the control buffer of the skb due to + * fragmentation of original iucv message. */ + err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, + CB_TRGCLS_LEN, CB_TRGCLS(skb)); + if (err) { + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + return err; + } /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); - goto done; + /* SOCK_STREAM: re-queue skb if it contains unreceived data */ + if (sk->sk_type == SOCK_STREAM) { + skb_pull(skb, copied); + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + goto done; + } } kfree_skb(skb); @@ -866,7 +1056,11 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } done: - return err ? : copied; + /* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */ + if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC)) + copied = rlen; + + return copied; } static inline unsigned int iucv_accept_poll(struct sock *parent) @@ -928,7 +1122,6 @@ static int iucv_sock_shutdown(struct socket *sock, int how) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_message txmsg; int err = 0; - u8 prmmsg[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; how++; @@ -953,7 +1146,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) txmsg.class = 0; txmsg.tag = 0; err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, - (void *) prmmsg, 8); + (void *) iprm_shutdown, 8); if (err) { switch (err) { case 1: @@ -1007,6 +1200,98 @@ static int iucv_sock_release(struct socket *sock) return err; } +/* getsockopt and setsockopt */ +static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int val; + int rc; + + if (level != SOL_IUCV) + return -ENOPROTOOPT; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + rc = 0; + + lock_sock(sk); + switch (optname) { + case SO_IPRMDATA_MSG: + if (val) + iucv->flags |= IUCV_IPRMDATA; + else + iucv->flags &= ~IUCV_IPRMDATA; + break; + case SO_MSGLIMIT: + switch (sk->sk_state) { + case IUCV_OPEN: + case IUCV_BOUND: + if (val < 1 || val > (u16)(~0)) + rc = -EINVAL; + else + iucv->msglimit = val; + break; + default: + rc = -EINVAL; + break; + } + break; + default: + rc = -ENOPROTOOPT; + break; + } + release_sock(sk); + + return rc; +} + +static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int val, len; + + if (level != SOL_IUCV) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < 0) + return -EINVAL; + + len = min_t(unsigned int, len, sizeof(int)); + + switch (optname) { + case SO_IPRMDATA_MSG: + val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; + break; + case SO_MSGLIMIT: + lock_sock(sk); + val = (iucv->path != NULL) ? iucv->path->msglim /* connected */ + : iucv->msglimit; /* default */ + release_sock(sk); + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + + /* Callback wrappers - called from iucv base support */ static int iucv_callback_connreq(struct iucv_path *path, u8 ipvmid[8], u8 ipuser[16]) @@ -1060,7 +1345,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); if (!nsk) { err = iucv_path_sever(path, user_data); iucv_path_free(path); @@ -1083,7 +1368,9 @@ static int iucv_callback_connreq(struct iucv_path *path, memcpy(nuser_data + 8, niucv->src_name, 8); ASCEBC(nuser_data + 8, 8); - path->msglim = IUCV_QUEUELEN_DEFAULT; + /* set message limit for path based on msglimit of accepting socket */ + niucv->msglimit = iucv->msglimit; + path->msglim = iucv->msglimit; err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { err = iucv_path_sever(path, user_data); @@ -1131,11 +1418,11 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) goto save_message; len = atomic_read(&sk->sk_rmem_alloc); - len += msg->length + sizeof(struct sk_buff); + len += iucv_msg_length(msg) + sizeof(struct sk_buff); if (len > sk->sk_rcvbuf) goto save_message; - skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); + skb = alloc_skb(iucv_msg_length(msg), GFP_ATOMIC | GFP_DMA); if (!skb) goto save_message; @@ -1170,7 +1457,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, list_skb->cb, 4)) { + if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { this = list_skb; break; } @@ -1206,6 +1493,21 @@ static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16]) sk->sk_state_change(sk); } +/* called if the other communication side shuts down its RECV direction; + * in turn, the callback sets SEND_SHUTDOWN to disable sending of data. + */ +static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16]) +{ + struct sock *sk = path->private; + + bh_lock_sock(sk); + if (sk->sk_state != IUCV_CLOSED) { + sk->sk_shutdown |= SEND_SHUTDOWN; + sk->sk_state_change(sk); + } + bh_unlock_sock(sk); +} + static struct proto_ops iucv_sock_ops = { .family = PF_IUCV, .owner = THIS_MODULE, @@ -1222,8 +1524,8 @@ static struct proto_ops iucv_sock_ops = { .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = iucv_sock_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt + .setsockopt = iucv_sock_setsockopt, + .getsockopt = iucv_sock_getsockopt, }; static struct net_proto_family iucv_sock_family_ops = { diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index a35240f61ec3..61e8038a55ee 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -280,6 +280,7 @@ union iucv_param { * Anchor for per-cpu IUCV command parameter block. */ static union iucv_param *iucv_param[NR_CPUS]; +static union iucv_param *iucv_param_irq[NR_CPUS]; /** * iucv_call_b2f0 @@ -358,7 +359,7 @@ static void iucv_allow_cpu(void *data) * 0x10 - Flag to allow priority message completion interrupts * 0x08 - Flag to allow IUCV control interrupts */ - parm = iucv_param[cpu]; + parm = iucv_param_irq[cpu]; memset(parm, 0, sizeof(union iucv_param)); parm->set_mask.ipmask = 0xf8; iucv_call_b2f0(IUCV_SETMASK, parm); @@ -379,7 +380,7 @@ static void iucv_block_cpu(void *data) union iucv_param *parm; /* Disable all iucv interrupts. */ - parm = iucv_param[cpu]; + parm = iucv_param_irq[cpu]; memset(parm, 0, sizeof(union iucv_param)); iucv_call_b2f0(IUCV_SETMASK, parm); @@ -403,7 +404,7 @@ static void iucv_declare_cpu(void *data) return; /* Declare interrupt buffer. */ - parm = iucv_param[cpu]; + parm = iucv_param_irq[cpu]; memset(parm, 0, sizeof(union iucv_param)); parm->db.ipbfadr1 = virt_to_phys(iucv_irq_data[cpu]); rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm); @@ -460,7 +461,7 @@ static void iucv_retrieve_cpu(void *data) iucv_block_cpu(NULL); /* Retrieve interrupt buffer. */ - parm = iucv_param[cpu]; + parm = iucv_param_irq[cpu]; iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm); /* Clear indication that an iucv buffer exists for this cpu. */ @@ -574,11 +575,22 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, iucv_irq_data[cpu] = NULL; return NOTIFY_BAD; } + iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_param_irq[cpu]) { + kfree(iucv_param[cpu]); + iucv_param[cpu] = NULL; + kfree(iucv_irq_data[cpu]); + iucv_irq_data[cpu] = NULL; + return NOTIFY_BAD; + } break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: + kfree(iucv_param_irq[cpu]); + iucv_param_irq[cpu] = NULL; kfree(iucv_param[cpu]); iucv_param[cpu] = NULL; kfree(iucv_irq_data[cpu]); @@ -625,7 +637,7 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) { union iucv_param *parm; - parm = iucv_param[smp_processor_id()]; + parm = iucv_param_irq[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); if (userdata) memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); @@ -918,10 +930,8 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) if (iucv_active_cpu != smp_processor_id()) spin_lock_bh(&iucv_table_lock); rc = iucv_sever_pathid(path->pathid, userdata); - if (!rc) { - iucv_path_table[path->pathid] = NULL; - list_del_init(&path->list); - } + iucv_path_table[path->pathid] = NULL; + list_del_init(&path->list); if (iucv_active_cpu != smp_processor_id()) spin_unlock_bh(&iucv_table_lock); preempt_enable(); @@ -1378,6 +1388,8 @@ static void iucv_path_complete(struct iucv_irq_data *data) struct iucv_path_complete *ipc = (void *) data; struct iucv_path *path = iucv_path_table[ipc->ippathid]; + if (path) + path->flags = ipc->ipflags1; if (path && path->handler && path->handler->path_complete) path->handler->path_complete(path, ipc->ipuser); } @@ -1413,7 +1425,7 @@ static void iucv_path_severed(struct iucv_irq_data *data) else { iucv_sever_pathid(path->pathid, NULL); iucv_path_table[path->pathid] = NULL; - list_del_init(&path->list); + list_del(&path->list); iucv_path_free(path); } } @@ -1717,6 +1729,13 @@ static int __init iucv_init(void) rc = -ENOMEM; goto out_free; } + iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); + if (!iucv_param_irq[cpu]) { + rc = -ENOMEM; + goto out_free; + } + } rc = register_hotcpu_notifier(&iucv_cpu_notifier); if (rc) @@ -1734,6 +1753,8 @@ out_cpu: unregister_hotcpu_notifier(&iucv_cpu_notifier); out_free: for_each_possible_cpu(cpu) { + kfree(iucv_param_irq[cpu]); + iucv_param_irq[cpu] = NULL; kfree(iucv_param[cpu]); iucv_param[cpu] = NULL; kfree(iucv_irq_data[cpu]); @@ -1764,6 +1785,8 @@ static void __exit iucv_exit(void) spin_unlock_irq(&iucv_queue_lock); unregister_hotcpu_notifier(&iucv_cpu_notifier); for_each_possible_cpu(cpu) { + kfree(iucv_param_irq[cpu]); + iucv_param_irq[cpu] = NULL; kfree(iucv_param[cpu]); iucv_param[cpu] = NULL; kfree(iucv_irq_data[cpu]); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 20cf16fc572f..b11e7e527864 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -35,7 +35,6 @@ #include <linux/kernel.h> #include <linux/in.h> #include <linux/poll.h> -#include <linux/version.h> #include <net/sock.h> #include "rds.h" diff --git a/net/rds/connection.c b/net/rds/connection.c index 273f064930a8..d14445c48304 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -148,14 +148,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, if (conn) goto out; - conn = kmem_cache_alloc(rds_conn_slab, gfp); + conn = kmem_cache_zalloc(rds_conn_slab, gfp); if (conn == NULL) { conn = ERR_PTR(-ENOMEM); goto out; } - memset(conn, 0, sizeof(*conn)); - INIT_HLIST_NODE(&conn->c_hash_node); conn->c_version = RDS_PROTOCOL_3_0; conn->c_laddr = laddr; diff --git a/net/rds/ib.c b/net/rds/ib.c index 4933b380985e..b9bcd32431e1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -224,8 +224,8 @@ static int rds_ib_laddr_check(__be32 addr) * IB and iWARP capable NICs. */ cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); - if (!cm_id) - return -EADDRNOTAVAIL; + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; diff --git a/net/rds/ib.h b/net/rds/ib.h index 069206cae733..455ae73047fe 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -333,7 +333,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, - u32 *adv_credits, int need_posted); + u32 *adv_credits, int need_posted, int max_posted); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 36d931573ff4..5709bad28329 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -524,7 +524,7 @@ void rds_ib_attempt_ack(struct rds_ib_connection *ic) } /* Can we get a send credit? */ - if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0)) { + if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) { rds_ib_stats_inc(s_ib_tx_throttle); clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); return; diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c index 99a6ccae964c..ff97e8eda858 100644 --- a/net/rds/ib_ring.c +++ b/net/rds/ib_ring.c @@ -137,7 +137,7 @@ int rds_ib_ring_empty(struct rds_ib_work_ring *ring) int rds_ib_ring_low(struct rds_ib_work_ring *ring) { - return __rds_ib_ring_used(ring) <= (ring->w_nr >> 2); + return __rds_ib_ring_used(ring) <= (ring->w_nr >> 1); } /* diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index cb6c52cb1c4c..23bf830db2d5 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -311,7 +311,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) * and using atomic_cmpxchg when updating the two counters. */ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, - u32 wanted, u32 *adv_credits, int need_posted) + u32 wanted, u32 *adv_credits, int need_posted, int max_posted) { unsigned int avail, posted, got = 0, advertise; long oldval, newval; @@ -351,7 +351,7 @@ try_again: * available. */ if (posted && (got || need_posted)) { - advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); + advertise = min_t(unsigned int, posted, max_posted); newval -= IB_SET_POST_CREDITS(advertise); } @@ -498,7 +498,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, credit_alloc = work_alloc; if (ic->i_flowctl) { - credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0); + credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); adv_credits += posted; if (credit_alloc < work_alloc) { rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); @@ -506,7 +506,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, flow_controlled++; } if (work_alloc == 0) { - rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); rds_ib_stats_inc(s_ib_tx_throttle); ret = -ENOMEM; goto out; @@ -571,7 +571,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, /* * Update adv_credits since we reset the ACK_REQUIRED bit. */ - rds_ib_send_grab_credits(ic, 0, &posted, 1); + rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); } else if (ic->i_rm != rm) diff --git a/net/rds/info.c b/net/rds/info.c index 1d885535214d..62aeef37aefe 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -188,10 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, ret = -ENOMEM; goto out; } - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, - pages, NULL); - up_read(¤t->mm->mmap_sem); + ret = get_user_pages_fast(start, nr_pages, 1, pages); if (ret != nr_pages) { if (ret > 0) nr_pages = ret; diff --git a/net/rds/iw.c b/net/rds/iw.c index b732efb5b634..d16e1cbc8e83 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -233,8 +233,8 @@ static int rds_iw_laddr_check(__be32 addr) * IB and iWARP capable NICs. */ cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); - if (!cm_id) - return -EADDRNOTAVAIL; + if (IS_ERR(cm_id)) + return PTR_ERR(cm_id); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; diff --git a/net/rds/iw.h b/net/rds/iw.h index b4fb27252895..0715dde323e7 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -361,7 +361,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, - u32 *adv_credits, int need_posted); + u32 *adv_credits, int need_posted, int max_posted); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats); diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index fde470fa50d5..8683f5f66c4b 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -524,7 +524,7 @@ void rds_iw_attempt_ack(struct rds_iw_connection *ic) } /* Can we get a send credit? */ - if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0)) { + if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) { rds_iw_stats_inc(s_iw_tx_throttle); clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); return; diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c index d422d4b5deef..da8e3b63f663 100644 --- a/net/rds/iw_ring.c +++ b/net/rds/iw_ring.c @@ -137,7 +137,7 @@ int rds_iw_ring_empty(struct rds_iw_work_ring *ring) int rds_iw_ring_low(struct rds_iw_work_ring *ring) { - return __rds_iw_ring_used(ring) <= (ring->w_nr >> 2); + return __rds_iw_ring_used(ring) <= (ring->w_nr >> 1); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 22dd38ffd608..44a6a0551f28 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -347,7 +347,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * and using atomic_cmpxchg when updating the two counters. */ int rds_iw_send_grab_credits(struct rds_iw_connection *ic, - u32 wanted, u32 *adv_credits, int need_posted) + u32 wanted, u32 *adv_credits, int need_posted, int max_posted) { unsigned int avail, posted, got = 0, advertise; long oldval, newval; @@ -387,7 +387,7 @@ try_again: * available. */ if (posted && (got || need_posted)) { - advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); + advertise = min_t(unsigned int, posted, max_posted); newval -= IB_SET_POST_CREDITS(advertise); } @@ -541,7 +541,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, credit_alloc = work_alloc; if (ic->i_flowctl) { - credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0); + credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); adv_credits += posted; if (credit_alloc < work_alloc) { rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); @@ -549,7 +549,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, flow_controlled++; } if (work_alloc == 0) { - rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); rds_iw_stats_inc(s_iw_tx_throttle); ret = -ENOMEM; goto out; @@ -614,7 +614,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, /* * Update adv_credits since we reset the ACK_REQUIRED bit. */ - rds_iw_send_grab_credits(ic, 0, &posted, 1); + rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); adv_credits += posted; BUG_ON(adv_credits > 255); } else if (ic->i_rm != rm) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index eaeeb91e1119..8dc83d2caa58 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -150,12 +150,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, { int ret; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, user_addr, - nr_pages, write, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); + ret = get_user_pages_fast(user_addr, nr_pages, write, pages); - if (0 <= ret && (unsigned) ret < nr_pages) { + if (ret >= 0 && ret < nr_pages) { while (ret--) put_page(pages[ret]); ret = -EFAULT; diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 7b19024f9706..7d0f901c93d5 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -34,7 +34,7 @@ #include "rdma_transport.h" -static struct rdma_cm_id *rds_iw_listen_id; +static struct rdma_cm_id *rds_rdma_listen_id; int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) @@ -161,7 +161,7 @@ static int __init rds_rdma_listen_init(void) rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT); - rds_iw_listen_id = cm_id; + rds_rdma_listen_id = cm_id; cm_id = NULL; out: if (cm_id) @@ -171,10 +171,10 @@ out: static void rds_rdma_listen_stop(void) { - if (rds_iw_listen_id) { - rdsdebug("cm %p\n", rds_iw_listen_id); - rdma_destroy_id(rds_iw_listen_id); - rds_iw_listen_id = NULL; + if (rds_rdma_listen_id) { + rdsdebug("cm %p\n", rds_rdma_listen_id); + rdma_destroy_id(rds_rdma_listen_id); + rds_rdma_listen_id = NULL; } } diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e5..1f82ec0d2066 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -132,7 +132,7 @@ struct rds_connection { #define RDS_FLAG_CONG_BITMAP 0x01 #define RDS_FLAG_ACK_REQUIRED 0x02 #define RDS_FLAG_RETRANSMITTED 0x04 -#define RDS_MAX_ADV_CREDIT 127 +#define RDS_MAX_ADV_CREDIT 255 /* * Maximum space available for extension headers. diff --git a/net/rds/send.c b/net/rds/send.c index 104fe033203d..a4a7f428cd76 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -854,11 +854,6 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rm->m_daddr = daddr; - /* Parse any control messages the user may have included. */ - ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); - if (ret) - goto out; - /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ if (rs->rs_conn && rs->rs_conn->c_faddr == daddr) @@ -874,6 +869,11 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rs->rs_conn = conn; } + /* Parse any control messages the user may have included. */ + ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); + if (ret) + goto out; + if ((rm->m_rdma_cookie || rm->m_rdma_op) && conn->c_trans->xmit_rdma == NULL) { if (printk_ratelimit()) |