summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/can/raw.c4
-rw-r--r--net/core/dev.c133
-rw-r--r--net/core/ethtool.c33
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/dns_resolver/Kconfig27
-rw-r--r--net/dns_resolver/Makefile7
-rw-r--r--net/dns_resolver/dns_key.c293
-rw-r--r--net/dns_resolver/dns_query.c165
-rw-r--r--net/dns_resolver/internal.h44
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/gre.c151
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/ip_gre.c14
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rxrpc/ar-internal.h16
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/cls_flow.c67
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/socket.c9
-rw-r--r--net/sunrpc/auth.c162
-rw-r--r--net/sunrpc/auth_generic.c23
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c29
-rw-r--r--net/sunrpc/auth_null.c2
-rw-r--r--net/sunrpc/auth_unix.c21
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/clnt.c182
-rw-r--r--net/sunrpc/sched.c104
-rw-r--r--net/sunrpc/sunrpc_syms.c16
-rw-r--r--net/sunrpc/xprt.c3
-rw-r--r--net/sunrpc/xprtsock.c26
-rw-r--r--net/tipc/bcast.c39
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/discover.c8
-rw-r--r--net/tipc/link.c25
-rw-r--r--net/tipc/name_table.c44
-rw-r--r--net/tipc/node.c22
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/port.c15
-rw-r--r--net/tipc/socket.c81
58 files changed, 1998 insertions, 538 deletions
diff --git a/net/Kconfig b/net/Kconfig
index e24fa0873f32..e330594d3709 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -213,6 +213,7 @@ source "net/phonet/Kconfig"
source "net/ieee802154/Kconfig"
source "net/sched/Kconfig"
source "net/dcb/Kconfig"
+source "net/dns_resolver/Kconfig"
config RPS
boolean
diff --git a/net/Makefile b/net/Makefile
index 41d420070a38..ea60fbce9b1b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -67,3 +67,4 @@ ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_SYSCTL) += sysctl_net.o
endif
obj-$(CONFIG_WIMAX) += wimax/
+obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3d..1b9c52a02cd3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
default:
if (level == SOL_SOCKET)
return -EINVAL;
- break;
+ break;
}
if (!vcc->dev || !vcc->dev->ops->getsockopt)
return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac8..181d70c73d70 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
static int lec_open(struct net_device *dev)
{
netif_start_queue(dev);
- memset(&dev->stats, 0, sizeof(struct net_device_stats));
return 0;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a172..26eaebf4aaa9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
out:
release_sock(sk);
- return 0;
+ return err;
}
/*
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd6..a1690845dc6e 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
{
ax25_uid_assoc *user;
ax25_route *ax25_rt;
- int err;
+ int err = 0;
if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
put:
ax25_put_route(ax25_rt);
- return 0;
+ return err;
}
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f084..7d77e67e57af 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
if (err < 0)
goto free_skb;
- err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+ err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (err < 0)
goto free_skb;
/* to be able to check the received tx sock reference in raw_rcv() */
- skb_tx(skb)->prevent_sk_orphan = 1;
+ skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
skb->dev = dev;
skb->sk = sk;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3721fbb9a83c..7cd5237d9822 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1902,14 +1902,14 @@ static int dev_gso_segment(struct sk_buff *skb)
/*
* Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested, since
- * drivers need to call skb_tstamp_tx() to send the timestamp.
+ * We cannot orphan skb if tx timestamp is requested or the sk-reference
+ * is needed on driver level for other reasons, e.g. see net/can/raw.c
*/
static inline void skb_orphan_try(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- if (sk && !skb_tx(skb)->flags) {
+ if (sk && !skb_shinfo(skb)->tx_flags) {
/* skb_tx_hash() wont be able to get sk.
* We copy sk_hash into skb->rxhash
*/
@@ -2259,69 +2259,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
-#ifdef CONFIG_RPS
-
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-
/*
- * get_rps_cpu is called from netif_receive_skb and returns the target
- * CPU from the RPS map of the receiving queue for a given skb.
- * rcu_read_lock must be held on entry.
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. Returns a non-zero hash number on success
+ * and 0 on failure.
*/
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
- struct rps_dev_flow **rflowp)
+__u32 __skb_get_rxhash(struct sk_buff *skb)
{
+ int nhoff, hash = 0, poff;
struct ipv6hdr *ip6;
struct iphdr *ip;
- struct netdev_rx_queue *rxqueue;
- struct rps_map *map;
- struct rps_dev_flow_table *flow_table;
- struct rps_sock_flow_table *sock_flow_table;
- int cpu = -1;
u8 ip_proto;
- u16 tcpu;
u32 addr1, addr2, ihl;
union {
u32 v32;
u16 v16[2];
} ports;
- if (skb_rx_queue_recorded(skb)) {
- u16 index = skb_get_rx_queue(skb);
- if (unlikely(index >= dev->num_rx_queues)) {
- WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
- "on queue %u, but number of RX queues is %u\n",
- dev->name, index, dev->num_rx_queues);
- goto done;
- }
- rxqueue = dev->_rx + index;
- } else
- rxqueue = dev->_rx;
-
- if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
- goto done;
-
- if (skb->rxhash)
- goto got_hash; /* Skip hash computation on packet header */
+ nhoff = skb_network_offset(skb);
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- if (!pskb_may_pull(skb, sizeof(*ip)))
+ if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
goto done;
- ip = (struct iphdr *) skb->data;
- ip_proto = ip->protocol;
+ ip = (struct iphdr *) (skb->data + nhoff);
+ if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+ ip_proto = 0;
+ else
+ ip_proto = ip->protocol;
addr1 = (__force u32) ip->saddr;
addr2 = (__force u32) ip->daddr;
ihl = ip->ihl;
break;
case __constant_htons(ETH_P_IPV6):
- if (!pskb_may_pull(skb, sizeof(*ip6)))
+ if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
goto done;
- ip6 = (struct ipv6hdr *) skb->data;
+ ip6 = (struct ipv6hdr *) (skb->data + nhoff);
ip_proto = ip6->nexthdr;
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,33 +2305,71 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
default:
goto done;
}
- switch (ip_proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- case IPPROTO_AH:
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- if (pskb_may_pull(skb, (ihl * 4) + 4)) {
- ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+
+ ports.v32 = 0;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ nhoff += ihl * 4 + poff;
+ if (pskb_may_pull(skb, nhoff + 4)) {
+ ports.v32 = * (__force u32 *) (skb->data + nhoff);
if (ports.v16[1] < ports.v16[0])
swap(ports.v16[0], ports.v16[1]);
- break;
}
- default:
- ports.v32 = 0;
- break;
}
/* get a consistent hash (same value on both flow directions) */
if (addr2 < addr1)
swap(addr1, addr2);
- skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
- if (!skb->rxhash)
- skb->rxhash = 1;
-got_hash:
+ hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ if (!hash)
+ hash = 1;
+
+done:
+ return hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
+{
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
+ int cpu = -1;
+ u16 tcpu;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->num_rx_queues)) {
+ WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+ "on queue %u, but number of RX queues is %u\n",
+ dev->name, index, dev->num_rx_queues);
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
+ goto done;
+
+ skb_reset_network_header(skb);
+ if (!skb_get_rxhash(skb))
+ goto done;
+
flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7a85367b3c2f..d2c4da5a6a4f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -205,18 +205,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
- if (!ops->get_drvinfo)
- return -EOPNOTSUPP;
-
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
- ops->get_drvinfo(dev, &info);
+ if (ops && ops->get_drvinfo) {
+ ops->get_drvinfo(dev, &info);
+ } else if (dev->dev.parent && dev->dev.parent->driver) {
+ strlcpy(info.bus_info, dev_name(dev->dev.parent),
+ sizeof(info.bus_info));
+ strlcpy(info.driver, dev->dev.parent->driver->name,
+ sizeof(info.driver));
+ } else {
+ return -EOPNOTSUPP;
+ }
/*
* this method of obtaining string set info is deprecated;
* Use ETHTOOL_GSSET_INFO instead.
*/
- if (ops->get_sset_count) {
+ if (ops && ops->get_sset_count) {
int rc;
rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +235,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
if (rc >= 0)
info.n_priv_flags = rc;
}
- if (ops->get_regs_len)
+ if (ops && ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
- if (ops->get_eeprom_len)
+ if (ops && ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -1402,12 +1408,19 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (!dev || !netif_device_present(dev))
return -ENODEV;
- if (!dev->ethtool_ops)
- return -EOPNOTSUPP;
-
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
+ if (!dev->ethtool_ops) {
+ /* ETHTOOL_GDRVINFO does not require any driver support.
+ * It is also unprivileged and does not change anything,
+ * so we can take a shortcut to it. */
+ if (ethcmd == ETHTOOL_GDRVINFO)
+ return ethtool_get_drvinfo(dev, useraddr);
+ else
+ return -EOPNOTSUPP;
+ }
+
/* Allow some commands to be done by anyone */
switch (ethcmd) {
case ETHTOOL_GDRVINFO:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3a2513f0d0c3..99ef721f773d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3016,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
} else {
/*
* no hardware time stamps available,
- * so keep the skb_shared_tx and only
+ * so keep the shared tx_flags and only
* store software time stamp
*/
skb->tstamp = ktime_get_real();
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011b..2ef115277bea 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
aux = scp->accessdata.acc_userl;
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
+ memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
aux = scp->accessdata.acc_passl;
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
+ memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
aux = scp->accessdata.acc_accl;
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
+ memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux);
+ memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
scp->persist = dn_nsp_persist(sk);
scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
new file mode 100644
index 000000000000..50d49f7e0472
--- /dev/null
+++ b/net/dns_resolver/Kconfig
@@ -0,0 +1,27 @@
+#
+# Configuration for DNS Resolver
+#
+config DNS_RESOLVER
+ tristate "DNS Resolver support"
+ depends on NET && KEYS
+ help
+ Saying Y here will include support for the DNS Resolver key type
+ which can be used to make upcalls to perform DNS lookups in
+ userspace.
+
+ DNS Resolver is used to query DNS server for information. Examples
+ being resolving a UNC hostname element to an IP address for CIFS or
+ performing a DNS query for AFSDB records so that AFS can locate a
+ cell's volume location database servers.
+
+ DNS Resolver is used by the CIFS and AFS modules, and would support
+ SMB2 later. DNS Resolver is supported by the userspace upcall
+ helper "/sbin/dns.resolver" via /etc/request-key.conf.
+
+ See <file:Documentation/networking/dns_resolver.txt> for further
+ information.
+
+ To compile this as a module, choose M here: the module will be called
+ dnsresolver.
+
+ If unsure, say N.
diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
new file mode 100644
index 000000000000..c0ef4e71dc49
--- /dev/null
+++ b/net/dns_resolver/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux DNS Resolver.
+#
+
+obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
+
+dns_resolver-objs := dns_key.o dns_query.o
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
new file mode 100644
index 000000000000..739435a6af39
--- /dev/null
+++ b/net/dns_resolver/dns_key.c
@@ -0,0 +1,293 @@
+/* Key type used to cache DNS lookups made by the kernel
+ *
+ * See Documentation/networking/dns_resolver.txt
+ *
+ * Copyright (c) 2007 Igor Mammedov
+ * Author(s): Igor Mammedov (niallain@gmail.com)
+ * Steve French (sfrench@us.ibm.com)
+ * Wang Lei (wang840925@gmail.com)
+ * David Howells (dhowells@redhat.com)
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/keyctl.h>
+#include <linux/err.h>
+#include <linux/seq_file.h>
+#include <keys/dns_resolver-type.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+MODULE_DESCRIPTION("DNS Resolver");
+MODULE_AUTHOR("Wang Lei");
+MODULE_LICENSE("GPL");
+
+unsigned dns_resolver_debug;
+module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
+
+const struct cred *dns_resolver_cache;
+
+#define DNS_ERRORNO_OPTION "dnserror"
+
+/*
+ * Instantiate a user defined key for dns_resolver.
+ *
+ * The data must be a NUL-terminated string, with the NUL char accounted in
+ * datalen.
+ *
+ * If the data contains a '#' characters, then we take the clause after each
+ * one to be an option of the form 'key=value'. The actual data of interest is
+ * the string leading up to the first '#'. For instance:
+ *
+ * "ip1,ip2,...#foo=bar"
+ */
+static int
+dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
+{
+ struct user_key_payload *upayload;
+ unsigned long derrno;
+ int ret;
+ size_t result_len = 0;
+ const char *data = _data, *end, *opt;
+
+ kenter("%%%d,%s,'%s',%zu",
+ key->serial, key->description, data, datalen);
+
+ if (datalen <= 1 || !data || data[datalen - 1] != '\0')
+ return -EINVAL;
+ datalen--;
+
+ /* deal with any options embedded in the data */
+ end = data + datalen;
+ opt = memchr(data, '#', datalen);
+ if (!opt) {
+ /* no options: the entire data is the result */
+ kdebug("no options");
+ result_len = datalen;
+ } else {
+ const char *next_opt;
+
+ result_len = opt - data;
+ opt++;
+ kdebug("options: '%s'", opt);
+ do {
+ const char *eq;
+ int opt_len, opt_nlen, opt_vlen, tmp;
+
+ next_opt = memchr(opt, '#', end - opt) ?: end;
+ opt_len = next_opt - opt;
+ if (!opt_len) {
+ printk(KERN_WARNING
+ "Empty option to dns_resolver key %d\n",
+ key->serial);
+ return -EINVAL;
+ }
+
+ eq = memchr(opt, '=', opt_len) ?: end;
+ opt_nlen = eq - opt;
+ eq++;
+ opt_vlen = next_opt - eq; /* will be -1 if no value */
+
+ tmp = opt_vlen >= 0 ? opt_vlen : 0;
+ kdebug("option '%*.*s' val '%*.*s'",
+ opt_nlen, opt_nlen, opt, tmp, tmp, eq);
+
+ /* see if it's an error number representing a DNS error
+ * that's to be recorded as the result in this key */
+ if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
+ memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
+ kdebug("dns error number option");
+ if (opt_vlen <= 0)
+ goto bad_option_value;
+
+ ret = strict_strtoul(eq, 10, &derrno);
+ if (ret < 0)
+ goto bad_option_value;
+
+ if (derrno < 1 || derrno > 511)
+ goto bad_option_value;
+
+ kdebug("dns error no. = %lu", derrno);
+ key->type_data.x[0] = -derrno;
+ continue;
+ }
+
+ bad_option_value:
+ printk(KERN_WARNING
+ "Option '%*.*s' to dns_resolver key %d:"
+ " bad/missing value\n",
+ opt_nlen, opt_nlen, opt, key->serial);
+ return -EINVAL;
+ } while (opt = next_opt + 1, opt < end);
+ }
+
+ /* don't cache the result if we're caching an error saying there's no
+ * result */
+ if (key->type_data.x[0]) {
+ kleave(" = 0 [h_error %ld]", key->type_data.x[0]);
+ return 0;
+ }
+
+ kdebug("store result");
+ ret = key_payload_reserve(key, result_len);
+ if (ret < 0)
+ return -EINVAL;
+
+ upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL);
+ if (!upayload) {
+ kleave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ upayload->datalen = result_len;
+ memcpy(upayload->data, data, result_len);
+ upayload->data[result_len] = '\0';
+ rcu_assign_pointer(key->payload.data, upayload);
+
+ kleave(" = 0");
+ return 0;
+}
+
+/*
+ * The description is of the form "[<type>:]<domain_name>"
+ *
+ * The domain name may be a simple name or an absolute domain name (which
+ * should end with a period). The domain name is case-independent.
+ */
+static int
+dns_resolver_match(const struct key *key, const void *description)
+{
+ int slen, dlen, ret = 0;
+ const char *src = key->description, *dsp = description;
+
+ kenter("%s,%s", src, dsp);
+
+ if (!src || !dsp)
+ goto no_match;
+
+ if (strcasecmp(src, dsp) == 0)
+ goto matched;
+
+ slen = strlen(src);
+ dlen = strlen(dsp);
+ if (slen <= 0 || dlen <= 0)
+ goto no_match;
+ if (src[slen - 1] == '.')
+ slen--;
+ if (dsp[dlen - 1] == '.')
+ dlen--;
+ if (slen != dlen || strncasecmp(src, dsp, slen) != 0)
+ goto no_match;
+
+matched:
+ ret = 1;
+no_match:
+ kleave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Describe a DNS key
+ */
+static void dns_resolver_describe(const struct key *key, struct seq_file *m)
+{
+ int err = key->type_data.x[0];
+
+ seq_puts(m, key->description);
+ if (err)
+ seq_printf(m, ": %d", err);
+ else
+ seq_printf(m, ": %u", key->datalen);
+}
+
+struct key_type key_type_dns_resolver = {
+ .name = "dns_resolver",
+ .instantiate = dns_resolver_instantiate,
+ .match = dns_resolver_match,
+ .revoke = user_revoke,
+ .destroy = user_destroy,
+ .describe = dns_resolver_describe,
+ .read = user_read,
+};
+
+static int __init init_dns_resolver(void)
+{
+ struct cred *cred;
+ struct key *keyring;
+ int ret;
+
+ printk(KERN_NOTICE "Registering the %s key type\n",
+ key_type_dns_resolver.name);
+
+ /* create an override credential set with a special thread keyring in
+ * which DNS requests are cached
+ *
+ * this is used to prevent malicious redirections from being installed
+ * with add_key().
+ */
+ cred = prepare_kernel_cred(NULL);
+ if (!cred)
+ return -ENOMEM;
+
+ keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ if (IS_ERR(keyring)) {
+ ret = PTR_ERR(keyring);
+ goto failed_put_cred;
+ }
+
+ ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+ if (ret < 0)
+ goto failed_put_key;
+
+ ret = register_key_type(&key_type_dns_resolver);
+ if (ret < 0)
+ goto failed_put_key;
+
+ /* instruct request_key() to use this special keyring as a cache for
+ * the results it looks up */
+ cred->thread_keyring = keyring;
+ cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+ dns_resolver_cache = cred;
+
+ kdebug("DNS resolver keyring: %d\n", key_serial(keyring));
+ return 0;
+
+failed_put_key:
+ key_put(keyring);
+failed_put_cred:
+ put_cred(cred);
+ return ret;
+}
+
+static void __exit exit_dns_resolver(void)
+{
+ key_revoke(dns_resolver_cache->thread_keyring);
+ unregister_key_type(&key_type_dns_resolver);
+ put_cred(dns_resolver_cache);
+ printk(KERN_NOTICE "Unregistered %s key type\n",
+ key_type_dns_resolver.name);
+}
+
+module_init(init_dns_resolver)
+module_exit(exit_dns_resolver)
+MODULE_LICENSE("GPL");
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
new file mode 100644
index 000000000000..c32be292c7e3
--- /dev/null
+++ b/net/dns_resolver/dns_query.c
@@ -0,0 +1,165 @@
+/* Upcall routine, designed to work as a key type and working through
+ * /sbin/request-key to contact userspace when handling DNS queries.
+ *
+ * See Documentation/networking/dns_resolver.txt
+ *
+ * Copyright (c) 2007 Igor Mammedov
+ * Author(s): Igor Mammedov (niallain@gmail.com)
+ * Steve French (sfrench@us.ibm.com)
+ * Wang Lei (wang840925@gmail.com)
+ * David Howells (dhowells@redhat.com)
+ *
+ * The upcall wrapper used to make an arbitrary DNS query.
+ *
+ * This function requires the appropriate userspace tool dns.upcall to be
+ * installed and something like the following lines should be added to the
+ * /etc/request-key.conf file:
+ *
+ * create dns_resolver * * /sbin/dns.upcall %k
+ *
+ * For example to use this module to query AFSDB RR:
+ *
+ * create dns_resolver afsdb:* * /sbin/dns.afsdb %k
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dns_resolver.h>
+#include <linux/err.h>
+#include <keys/dns_resolver-type.h>
+#include <keys/user-type.h>
+
+#include "internal.h"
+
+/**
+ * dns_query - Query the DNS
+ * @type: Query type (or NULL for straight host->IP lookup)
+ * @name: Name to look up
+ * @namelen: Length of name
+ * @options: Request options (or NULL if no options)
+ * @_result: Where to place the returned data.
+ * @_expiry: Where to store the result expiry time (or NULL)
+ *
+ * The data will be returned in the pointer at *result, and the caller is
+ * responsible for freeing it.
+ *
+ * The description should be of the form "[<query_type>:]<domain_name>", and
+ * the options need to be appropriate for the query type requested. If no
+ * query_type is given, then the query is a straight hostname to IP address
+ * lookup.
+ *
+ * The DNS resolution lookup is performed by upcalling to userspace by way of
+ * requesting a key of type dns_resolver.
+ *
+ * Returns the size of the result on success, -ve error code otherwise.
+ */
+int dns_query(const char *type, const char *name, size_t namelen,
+ const char *options, char **_result, time_t *_expiry)
+{
+ struct key *rkey;
+ struct user_key_payload *upayload;
+ const struct cred *saved_cred;
+ size_t typelen, desclen;
+ char *desc, *cp;
+ int ret, len;
+
+ kenter("%s,%*.*s,%zu,%s",
+ type, (int)namelen, (int)namelen, name, namelen, options);
+
+ if (!name || namelen == 0 || !_result)
+ return -EINVAL;
+
+ /* construct the query key description as "[<type>:]<name>" */
+ typelen = 0;
+ desclen = 0;
+ if (type) {
+ typelen = strlen(type);
+ if (typelen < 1)
+ return -EINVAL;
+ desclen += typelen + 1;
+ }
+
+ if (!namelen)
+ namelen = strlen(name);
+ if (namelen < 3)
+ return -EINVAL;
+ desclen += namelen + 1;
+
+ desc = kmalloc(desclen, GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ cp = desc;
+ if (type) {
+ memcpy(cp, type, typelen);
+ cp += typelen;
+ *cp++ = ':';
+ }
+ memcpy(cp, name, namelen);
+ cp += namelen;
+ *cp = '\0';
+
+ if (!options)
+ options = "";
+ kdebug("call request_key(,%s,%s)", desc, options);
+
+ /* make the upcall, using special credentials to prevent the use of
+ * add_key() to preinstall malicious redirections
+ */
+ saved_cred = override_creds(dns_resolver_cache);
+ rkey = request_key(&key_type_dns_resolver, desc, options);
+ revert_creds(saved_cred);
+ kfree(desc);
+ if (IS_ERR(rkey)) {
+ ret = PTR_ERR(rkey);
+ goto out;
+ }
+
+ down_read(&rkey->sem);
+ rkey->perm |= KEY_USR_VIEW;
+
+ ret = key_validate(rkey);
+ if (ret < 0)
+ goto put;
+
+ /* If the DNS server gave an error, return that to the caller */
+ ret = rkey->type_data.x[0];
+ if (ret)
+ goto put;
+
+ upayload = rcu_dereference_protected(rkey->payload.data,
+ lockdep_is_held(&rkey->sem));
+ len = upayload->datalen;
+
+ ret = -ENOMEM;
+ *_result = kmalloc(len + 1, GFP_KERNEL);
+ if (!*_result)
+ goto put;
+
+ memcpy(*_result, upayload->data, len + 1);
+ if (_expiry)
+ *_expiry = rkey->expiry;
+
+ ret = len;
+put:
+ up_read(&rkey->sem);
+ key_put(rkey);
+out:
+ kleave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(dns_query);
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
new file mode 100644
index 000000000000..189ca9e9b785
--- /dev/null
+++ b/net/dns_resolver/internal.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 Wang Lei
+ * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved.
+ *
+ * Internal DNS Rsolver stuff
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+/*
+ * dns_key.c
+ */
+extern const struct cred *dns_resolver_cache;
+
+/*
+ * debug tracing
+ */
+extern unsigned dns_resolver_debug;
+
+#define kdebug(FMT, ...) \
+do { \
+ if (unlikely(dns_resolver_debug)) \
+ printk(KERN_DEBUG "[%-6.6s] "FMT"\n", \
+ current->comm, ##__VA_ARGS__); \
+} while (0)
+
+#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7c3a7d191249..7458bdae7e9f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,8 +215,15 @@ config NET_IPIP
be inserted in and removed from the running kernel whenever you
want). Most people won't need this and can say N.
+config NET_IPGRE_DEMUX
+ tristate "IP: GRE demultiplexer"
+ help
+ This is helper module to demultiplex GRE packets on GRE version field criteria.
+ Required by ip_gre and pptp modules.
+
config NET_IPGRE
tristate "IP: GRE tunnels over IP"
+ depends on NET_IPGRE_DEMUX
help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43aa..4978d22f9a75 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_NET_IPIP) += ipip.o
+obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9f..f581f77d1097 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
/*
* inet_ehash_secret must be set exactly once
- * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
*/
void build_ehash_secret(void)
{
u32 rnd;
+
do {
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- spin_lock_bh(&inetsw_lock);
- if (!inet_ehash_secret)
- inet_ehash_secret = rnd;
- spin_unlock_bh(&inetsw_lock);
+
+ cmpxchg(&inet_ehash_secret, 0, rnd);
}
EXPORT_SYMBOL(build_ehash_secret);
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 000000000000..b546736da2e1
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
+/*
+ * GRE over IPv4 demultiplexer driver
+ *
+ * Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <linux/spinlock.h>
+#include <net/protocol.h>
+#include <net/gre.h>
+
+
+const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static DEFINE_SPINLOCK(gre_proto_lock);
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version)
+{
+ if (version >= GREPROTO_MAX)
+ goto err_out;
+
+ spin_lock(&gre_proto_lock);
+ if (gre_proto[version])
+ goto err_out_unlock;
+
+ rcu_assign_pointer(gre_proto[version], proto);
+ spin_unlock(&gre_proto_lock);
+ return 0;
+
+err_out_unlock:
+ spin_unlock(&gre_proto_lock);
+err_out:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(gre_add_protocol);
+
+int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+{
+ if (version >= GREPROTO_MAX)
+ goto err_out;
+
+ spin_lock(&gre_proto_lock);
+ if (gre_proto[version] != proto)
+ goto err_out_unlock;
+ rcu_assign_pointer(gre_proto[version], NULL);
+ spin_unlock(&gre_proto_lock);
+ synchronize_rcu();
+ return 0;
+
+err_out_unlock:
+ spin_unlock(&gre_proto_lock);
+err_out:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(gre_del_protocol);
+
+static int gre_rcv(struct sk_buff *skb)
+{
+ const struct gre_protocol *proto;
+ u8 ver;
+ int ret;
+
+ if (!pskb_may_pull(skb, 12))
+ goto drop;
+
+ ver = skb->data[1]&0x7f;
+ if (ver >= GREPROTO_MAX)
+ goto drop;
+
+ rcu_read_lock();
+ proto = rcu_dereference(gre_proto[ver]);
+ if (!proto || !proto->handler)
+ goto drop_unlock;
+ ret = proto->handler(skb);
+ rcu_read_unlock();
+ return ret;
+
+drop_unlock:
+ rcu_read_unlock();
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static void gre_err(struct sk_buff *skb, u32 info)
+{
+ const struct gre_protocol *proto;
+ u8 ver;
+
+ if (!pskb_may_pull(skb, 12))
+ goto drop;
+
+ ver = skb->data[1]&0x7f;
+ if (ver >= GREPROTO_MAX)
+ goto drop;
+
+ rcu_read_lock();
+ proto = rcu_dereference(gre_proto[ver]);
+ if (!proto || !proto->err_handler)
+ goto drop_unlock;
+ proto->err_handler(skb, info);
+ rcu_read_unlock();
+ return;
+
+drop_unlock:
+ rcu_read_unlock();
+drop:
+ kfree_skb(skb);
+}
+
+static const struct net_protocol net_gre_protocol = {
+ .handler = gre_rcv,
+ .err_handler = gre_err,
+ .netns_ok = 1,
+};
+
+static int __init gre_init(void)
+{
+ pr_info("GRE over IPv4 demultiplexor driver");
+
+ if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
+ pr_err("gre: can't add protocol\n");
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __exit gre_exit(void)
+{
+ inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+}
+
+module_init(gre_init);
+module_exit(gre_exit);
+
+MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_LICENSE("GPL");
+
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba5..96bc7f9475a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
if (icmp_param->replyopts.optlen) {
ipc.opt = &icmp_param->replyopts;
if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
inet_sk(sk)->tos = tos;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
{
struct flowi fl = {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 945b20a5ad50..85176895495a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
+#include <net/gre.h>
#ifdef CONFIG_IPV6
#include <net/ipv6.h>
@@ -1278,10 +1279,9 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
}
-static const struct net_protocol ipgre_protocol = {
- .handler = ipgre_rcv,
- .err_handler = ipgre_err,
- .netns_ok = 1,
+static const struct gre_protocol ipgre_protocol = {
+ .handler = ipgre_rcv,
+ .err_handler = ipgre_err,
};
static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1663,7 +1663,7 @@ static int __init ipgre_init(void)
if (err < 0)
return err;
- err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
+ err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
printk(KERN_INFO "ipgre init: can't add protocol\n");
goto add_proto_failed;
@@ -1683,7 +1683,7 @@ out:
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
unregister_pernet_device(&ipgre_net_ops);
goto out;
@@ -1693,7 +1693,7 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
+ if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
printk(KERN_INFO "ipgre close: can't remove protocol\n");
unregister_pernet_device(&ipgre_net_ops);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04b69896df5f..e807492f1777 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -953,7 +953,7 @@ alloc_new_skb:
else
/* only the initial fragment is
time stamped */
- ipc->shtx.flags = 0;
+ ipc->tx_flags = 0;
}
if (skb == NULL)
goto error;
@@ -964,7 +964,7 @@ alloc_new_skb:
skb->ip_summed = csummode;
skb->csum = 0;
skb_reserve(skb, hh_len);
- *skb_tx(skb) = ipc->shtx;
+ skb_shinfo(skb)->tx_flags = ipc->tx_flags;
/*
* Find where to start putting bytes.
@@ -1384,7 +1384,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
if (replyopts.opt.optlen) {
ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db87..1e26a4897655 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/net_namespace.h>
#include <net/checksum.h>
+#include <net/ip.h>
#define CLUSTERIP_VERSION "0.8"
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
unsigned long hashval;
- u_int16_t sport, dport;
- const u_int16_t *ports;
-
- switch (iph->protocol) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- case IPPROTO_ICMP:
- ports = (const void *)iph+iph->ihl*4;
- sport = ports[0];
- dport = ports[1];
- break;
- default:
+ u_int16_t sport = 0, dport = 0;
+ int poff;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0) {
+ const u_int16_t *ports;
+ u16 _ports[2];
+
+ ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
+ if (ports) {
+ sport = ports[0];
+ dport = ports[1];
+ }
+ } else {
if (net_ratelimit())
pr_info("unknown protocol %u\n", iph->protocol);
- sport = dport = 0;
}
switch (config->hash_mode) {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1ef..1f85ef289895 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->inet_saddr;
ipc.opt = NULL;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3f56b6e6c6aa..85a67c9d5982 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1268,18 +1268,11 @@ skip_hashing:
void rt_bind_peer(struct rtable *rt, int create)
{
- static DEFINE_SPINLOCK(rt_peer_lock);
struct inet_peer *peer;
peer = inet_getpeer(rt->rt_dst, create);
- spin_lock_bh(&rt_peer_lock);
- if (rt->peer == NULL) {
- rt->peer = peer;
- peer = NULL;
- }
- spin_unlock_bh(&rt_peer_lock);
- if (peer)
+ if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
inet_putpeer(peer);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32e0bef60d0a..86e757e162ee 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
return -EOPNOTSUPP;
ipc.opt = NULL;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
if (up->pending) {
/*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
- err = sock_tx_timestamp(msg, sk, &ipc.shtx);
+ err = sock_tx_timestamp(sk, &ipc.tx_flags);
if (err)
return err;
if (msg->msg_controllen) {
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 6d0bd198af19..be04d46110fe 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -103,6 +103,7 @@ ieee80211_rate_control_ops_get(const char *name)
struct rate_control_ops *ops;
const char *alg_name;
+ kparam_block_sysfs_write(ieee80211_default_rc_algo);
if (!name)
alg_name = ieee80211_default_rc_algo;
else
@@ -120,6 +121,7 @@ ieee80211_rate_control_ops_get(const char *name)
/* try built-in one if specific alg requested but not found */
if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
+ kparam_unblock_sysfs_write(ieee80211_default_rc_algo);
return ops;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b46a8390896d..9228ee0dc11a 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -448,6 +448,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
{
__be16 _ports[2], *ports;
u8 nexthdr;
+ int poff;
memset(dst, 0, sizeof(*dst));
@@ -492,19 +493,13 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
return 0;
}
- switch (nexthdr) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- ports = skb_header_pointer(skb, protoff, sizeof(_ports),
+ poff = proto_ports_offset(nexthdr);
+ if (poff >= 0) {
+ ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
&_ports);
- break;
- default:
+ } else {
_ports[0] = _ports[1] = 0;
ports = _ports;
- break;
}
if (!ports)
return -1;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b1253..3616f27b9d46 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+ err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (err < 0)
goto out_unlock;
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
if (err)
goto out_free;
- err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+ err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (err < 0)
goto out_free;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7043b294bb67..8e22bd345e71 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -597,12 +597,6 @@ extern unsigned rxrpc_debug;
#define dbgprintk(FMT,...) \
printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
-/* make sure we maintain the format strings, even when debugging is disabled */
-static inline __attribute__((format(printf,1,2)))
-void _dbprintk(const char *fmt, ...)
-{
-}
-
#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
@@ -655,11 +649,11 @@ do { \
} while (0)
#else
-#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
-#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
-#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
-#define _proto(FMT,...) _dbprintk("### "FMT ,##__VA_ARGS__)
-#define _net(FMT,...) _dbprintk("@@@ "FMT ,##__VA_ARGS__)
+#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
+#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__)
+#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__)
#endif
/*
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..522d5a9a2825 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
To compile this code as a module, choose M here: the
module will be called act_skbedit.
+config NET_ACT_CSUM
+ tristate "Checksum Updating"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to update some common checksum after some direct
+ packet alterations.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_csum.
+
config NET_CLS_IND
bool "Incoming device classification"
depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..960f5dba6304 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
+obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..58d7f36949da
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
+/*
+ * Checksum updating actions
+ *
+ * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <linux/skbuff.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/igmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#include <net/act_api.h>
+
+#include <linux/tc_act/tc_csum.h>
+#include <net/tc_act/tc_csum.h>
+
+#define CSUM_TAB_MASK 15
+static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
+static u32 csum_idx_gen;
+static DEFINE_RWLOCK(csum_lock);
+
+static struct tcf_hashinfo csum_hash_info = {
+ .htab = tcf_csum_ht,
+ .hmask = CSUM_TAB_MASK,
+ .lock = &csum_lock,
+};
+
+static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
+ [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
+};
+
+static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ struct nlattr *tb[TCA_CSUM_MAX + 1];
+ struct tc_csum *parm;
+ struct tcf_common *pc;
+ struct tcf_csum *p;
+ int ret = 0, err;
+
+ if (nla == NULL)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_CSUM_PARMS] == NULL)
+ return -EINVAL;
+ parm = nla_data(tb[TCA_CSUM_PARMS]);
+
+ pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &csum_idx_gen, &csum_hash_info);
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
+ p = to_tcf_csum(pc);
+ ret = ACT_P_CREATED;
+ } else {
+ p = to_tcf_csum(pc);
+ if (!ovr) {
+ tcf_hash_release(pc, bind, &csum_hash_info);
+ return -EEXIST;
+ }
+ }
+
+ spin_lock_bh(&p->tcf_lock);
+ p->tcf_action = parm->action;
+ p->update_flags = parm->update_flags;
+ spin_unlock_bh(&p->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(pc, &csum_hash_info);
+
+ return ret;
+}
+
+static int tcf_csum_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_csum *p = a->priv;
+ return tcf_hash_release(&p->common, bind, &csum_hash_info);
+}
+
+/**
+ * tcf_csum_skb_nextlayer - Get next layer pointer
+ * @skb: sk_buff to use
+ * @ihl: previous summed headers length
+ * @ipl: complete packet length
+ * @jhl: next header length
+ *
+ * Check the expected next layer availability in the specified sk_buff.
+ * Return the next layer pointer if pass, NULL otherwise.
+ */
+static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
+ unsigned int ihl, unsigned int ipl,
+ unsigned int jhl)
+{
+ int ntkoff = skb_network_offset(skb);
+ int hl = ihl + jhl;
+
+ if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
+ (skb_cloned(skb) &&
+ !skb_clone_writable(skb, hl + ntkoff) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ return NULL;
+ else
+ return (void *)(skb_network_header(skb) + ihl);
+}
+
+static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
+ unsigned int ihl, unsigned int ipl)
+{
+ struct icmphdr *icmph;
+
+ icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
+ if (icmph == NULL)
+ return 0;
+
+ icmph->checksum = 0;
+ skb->csum = csum_partial(icmph, ipl - ihl, 0);
+ icmph->checksum = csum_fold(skb->csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
+static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
+ unsigned int ihl, unsigned int ipl)
+{
+ struct igmphdr *igmph;
+
+ igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
+ if (igmph == NULL)
+ return 0;
+
+ igmph->csum = 0;
+ skb->csum = csum_partial(igmph, ipl - ihl, 0);
+ igmph->csum = csum_fold(skb->csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int ihl, unsigned int ipl)
+{
+ struct icmp6hdr *icmp6h;
+
+ icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
+ if (icmp6h == NULL)
+ return 0;
+
+ icmp6h->icmp6_cksum = 0;
+ skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ ipl - ihl, IPPROTO_ICMPV6,
+ skb->csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int ihl, unsigned int ipl)
+{
+ struct tcphdr *tcph;
+
+ tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
+ if (tcph == NULL)
+ return 0;
+
+ tcph->check = 0;
+ skb->csum = csum_partial(tcph, ipl - ihl, 0);
+ tcph->check = tcp_v4_check(ipl - ihl,
+ iph->saddr, iph->daddr, skb->csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int ihl, unsigned int ipl)
+{
+ struct tcphdr *tcph;
+
+ tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
+ if (tcph == NULL)
+ return 0;
+
+ tcph->check = 0;
+ skb->csum = csum_partial(tcph, ipl - ihl, 0);
+ tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ ipl - ihl, IPPROTO_TCP,
+ skb->csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
+static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int ihl, unsigned int ipl, int udplite)
+{
+ struct udphdr *udph;
+ u16 ul;
+
+ /* Support both UDP and UDPLITE checksum algorithms,
+ * Don't use udph->len to get the real length without any protocol check,
+ * UDPLITE uses udph->len for another thing,
+ * Use iph->tot_len, or just ipl.
+ */
+
+ udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
+ if (udph == NULL)
+ return 0;
+
+ ul = ntohs(udph->len);
+
+ if (udplite || udph->check) {
+
+ udph->check = 0;
+
+ if (udplite) {
+ if (ul == 0)
+ skb->csum = csum_partial(udph, ipl - ihl, 0);
+
+ else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
+ skb->csum = csum_partial(udph, ul, 0);
+
+ else
+ goto ignore_obscure_skb;
+ } else {
+ if (ul != ipl - ihl)
+ goto ignore_obscure_skb;
+
+ skb->csum = csum_partial(udph, ul, 0);
+ }
+
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ ul, iph->protocol,
+ skb->csum);
+
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ignore_obscure_skb:
+ return 1;
+}
+
+static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int ihl, unsigned int ipl, int udplite)
+{
+ struct udphdr *udph;
+ u16 ul;
+
+ /* Support both UDP and UDPLITE checksum algorithms,
+ * Don't use udph->len to get the real length without any protocol check,
+ * UDPLITE uses udph->len for another thing,
+ * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
+ */
+
+ udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
+ if (udph == NULL)
+ return 0;
+
+ ul = ntohs(udph->len);
+
+ udph->check = 0;
+
+ if (udplite) {
+ if (ul == 0)
+ skb->csum = csum_partial(udph, ipl - ihl, 0);
+
+ else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
+ skb->csum = csum_partial(udph, ul, 0);
+
+ else
+ goto ignore_obscure_skb;
+ } else {
+ if (ul != ipl - ihl)
+ goto ignore_obscure_skb;
+
+ skb->csum = csum_partial(udph, ul, 0);
+ }
+
+ udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
+ udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
+ skb->csum);
+
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ignore_obscure_skb:
+ return 1;
+}
+
+static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
+{
+ struct iphdr *iph;
+ int ntkoff;
+
+ ntkoff = skb_network_offset(skb);
+
+ if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
+ goto fail;
+
+ iph = ip_hdr(skb);
+
+ switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
+ case IPPROTO_ICMP:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
+ if (!tcf_csum_ipv4_icmp(skb,
+ iph->ihl * 4, ntohs(iph->tot_len)))
+ goto fail;
+ break;
+ case IPPROTO_IGMP:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
+ if (!tcf_csum_ipv4_igmp(skb,
+ iph->ihl * 4, ntohs(iph->tot_len)))
+ goto fail;
+ break;
+ case IPPROTO_TCP:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
+ if (!tcf_csum_ipv4_tcp(skb, iph,
+ iph->ihl * 4, ntohs(iph->tot_len)))
+ goto fail;
+ break;
+ case IPPROTO_UDP:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
+ if (!tcf_csum_ipv4_udp(skb, iph,
+ iph->ihl * 4, ntohs(iph->tot_len), 0))
+ goto fail;
+ break;
+ case IPPROTO_UDPLITE:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
+ if (!tcf_csum_ipv4_udp(skb, iph,
+ iph->ihl * 4, ntohs(iph->tot_len), 1))
+ goto fail;
+ break;
+ }
+
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto fail;
+
+ ip_send_check(iph);
+ }
+
+ return 1;
+
+fail:
+ return 0;
+}
+
+static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
+ unsigned int ixhl, unsigned int *pl)
+{
+ int off, len, optlen;
+ unsigned char *xh = (void *)ip6xh;
+
+ off = sizeof(*ip6xh);
+ len = ixhl - off;
+
+ while (len > 1) {
+ switch (xh[off])
+ {
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ case IPV6_TLV_JUMBO:
+ optlen = xh[off + 1] + 2;
+ if (optlen != 6 || len < 6 || (off & 3) != 2)
+ /* wrong jumbo option length/alignment */
+ return 0;
+ *pl = ntohl(*(__be32 *)(xh + off + 2));
+ goto done;
+ default:
+ optlen = xh[off + 1] + 2;
+ if (optlen > len)
+ /* ignore obscure options */
+ goto done;
+ break;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+
+done:
+ return 1;
+}
+
+static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
+{
+ struct ipv6hdr *ip6h;
+ struct ipv6_opt_hdr *ip6xh;
+ unsigned int hl, ixhl;
+ unsigned int pl;
+ int ntkoff;
+ u8 nexthdr;
+
+ ntkoff = skb_network_offset(skb);
+
+ hl = sizeof(*ip6h);
+
+ if (!pskb_may_pull(skb, hl + ntkoff))
+ goto fail;
+
+ ip6h = ipv6_hdr(skb);
+
+ pl = ntohs(ip6h->payload_len);
+ nexthdr = ip6h->nexthdr;
+
+ do {
+ switch (nexthdr) {
+ case NEXTHDR_FRAGMENT:
+ goto ignore_skb;
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_HOP:
+ case NEXTHDR_DEST:
+ if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
+ goto fail;
+ ip6xh = (void *)(skb_network_header(skb) + hl);
+ ixhl = ipv6_optlen(ip6xh);
+ if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
+ goto fail;
+ if ((nexthdr == NEXTHDR_HOP) &&
+ !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
+ goto fail;
+ nexthdr = ip6xh->nexthdr;
+ hl += ixhl;
+ break;
+ case IPPROTO_ICMPV6:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
+ if (!tcf_csum_ipv6_icmp(skb, ip6h,
+ hl, pl + sizeof(*ip6h)))
+ goto fail;
+ goto done;
+ case IPPROTO_TCP:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
+ if (!tcf_csum_ipv6_tcp(skb, ip6h,
+ hl, pl + sizeof(*ip6h)))
+ goto fail;
+ goto done;
+ case IPPROTO_UDP:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
+ if (!tcf_csum_ipv6_udp(skb, ip6h,
+ hl, pl + sizeof(*ip6h), 0))
+ goto fail;
+ goto done;
+ case IPPROTO_UDPLITE:
+ if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
+ if (!tcf_csum_ipv6_udp(skb, ip6h,
+ hl, pl + sizeof(*ip6h), 1))
+ goto fail;
+ goto done;
+ default:
+ goto ignore_skb;
+ }
+ } while (pskb_may_pull(skb, hl + 1 + ntkoff));
+
+done:
+ignore_skb:
+ return 1;
+
+fail:
+ return 0;
+}
+
+static int tcf_csum(struct sk_buff *skb,
+ struct tc_action *a, struct tcf_result *res)
+{
+ struct tcf_csum *p = a->priv;
+ int action;
+ u32 update_flags;
+
+ spin_lock(&p->tcf_lock);
+ p->tcf_tm.lastuse = jiffies;
+ p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+ p->tcf_bstats.packets++;
+ action = p->tcf_action;
+ update_flags = p->update_flags;
+ spin_unlock(&p->tcf_lock);
+
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
+
+ switch (skb->protocol) {
+ case cpu_to_be16(ETH_P_IP):
+ if (!tcf_csum_ipv4(skb, update_flags))
+ goto drop;
+ break;
+ case cpu_to_be16(ETH_P_IPV6):
+ if (!tcf_csum_ipv6(skb, update_flags))
+ goto drop;
+ break;
+ }
+
+ return action;
+
+drop:
+ spin_lock(&p->tcf_lock);
+ p->tcf_qstats.drops++;
+ spin_unlock(&p->tcf_lock);
+ return TC_ACT_SHOT;
+}
+
+static int tcf_csum_dump(struct sk_buff *skb,
+ struct tc_action *a, int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_csum *p = a->priv;
+ struct tc_csum opt = {
+ .update_flags = p->update_flags,
+
+ .index = p->tcf_index,
+ .action = p->tcf_action,
+ .refcnt = p->tcf_refcnt - ref,
+ .bindcnt = p->tcf_bindcnt - bind,
+ };
+ struct tcf_t t;
+
+ NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
+ t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+ NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
+
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_csum_ops = {
+ .kind = "csum",
+ .hinfo = &csum_hash_info,
+ .type = TCA_ACT_CSUM,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_csum,
+ .dump = tcf_csum_dump,
+ .cleanup = tcf_csum_cleanup,
+ .lookup = tcf_hash_search,
+ .init = tcf_csum_init,
+ .walk = tcf_generic_walker
+};
+
+MODULE_DESCRIPTION("Checksum updating actions");
+MODULE_LICENSE("GPL");
+
+static int __init csum_init_module(void)
+{
+ return tcf_register_action(&act_csum_ops);
+}
+
+static void __exit csum_cleanup_module(void)
+{
+ tcf_unregister_action(&act_csum_ops);
+}
+
+module_init(csum_init_module);
+module_exit(csum_cleanup_module);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..cd709f1294df 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
}
}
-static int has_ports(u8 protocol)
-{
- switch (protocol) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- return 1;
- default:
- return 0;
- }
-}
-
static u32 flow_get_proto_src(struct sk_buff *skb)
{
switch (skb->protocol) {
case htons(ETH_P_IP): {
struct iphdr *iph;
+ int poff;
if (!pskb_network_may_pull(skb, sizeof(*iph)))
break;
iph = ip_hdr(skb);
- if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
- has_ports(iph->protocol) &&
- pskb_network_may_pull(skb, iph->ihl * 4 + 2))
- return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
+ if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+ break;
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
+ iph = ip_hdr(skb);
+ return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
+ poff));
+ }
break;
}
case htons(ETH_P_IPV6): {
struct ipv6hdr *iph;
+ int poff;
- if (!pskb_network_may_pull(skb, sizeof(*iph) + 2))
+ if (!pskb_network_may_pull(skb, sizeof(*iph)))
break;
iph = ipv6_hdr(skb);
- if (has_ports(iph->nexthdr))
- return ntohs(*(__be16 *)&iph[1]);
+ poff = proto_ports_offset(iph->nexthdr);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
+ iph = ipv6_hdr(skb);
+ return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
+ poff));
+ }
break;
}
}
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
switch (skb->protocol) {
case htons(ETH_P_IP): {
struct iphdr *iph;
+ int poff;
if (!pskb_network_may_pull(skb, sizeof(*iph)))
break;
iph = ip_hdr(skb);
- if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
- has_ports(iph->protocol) &&
- pskb_network_may_pull(skb, iph->ihl * 4 + 4))
- return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
+ if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+ break;
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
+ iph = ip_hdr(skb);
+ return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
+ 2 + poff));
+ }
break;
}
case htons(ETH_P_IPV6): {
struct ipv6hdr *iph;
+ int poff;
- if (!pskb_network_may_pull(skb, sizeof(*iph) + 4))
+ if (!pskb_network_may_pull(skb, sizeof(*iph)))
break;
iph = ipv6_hdr(skb);
- if (has_ports(iph->nexthdr))
- return ntohs(*(__be16 *)((void *)&iph[1] + 2));
+ poff = proto_ports_offset(iph->nexthdr);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
+ iph = ipv6_hdr(skb);
+ return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
+ poff + 2));
+ }
break;
}
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..6fb3d41c0e41 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -360,7 +360,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
}
- if (!s || tsize != s->tsize || (!tab && tsize > 0))
+ if (tsize != s->tsize || (!tab && tsize > 0))
return ERR_PTR(-EINVAL);
spin_lock(&qdisc_stab_lock);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..3cf478d012dd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,40 +123,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
case htons(ETH_P_IP):
{
const struct iphdr *iph;
+ int poff;
if (!pskb_network_may_pull(skb, sizeof(*iph)))
goto err;
iph = ip_hdr(skb);
h = (__force u32)iph->daddr;
h2 = (__force u32)iph->saddr ^ iph->protocol;
- if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
- (iph->protocol == IPPROTO_TCP ||
- iph->protocol == IPPROTO_UDP ||
- iph->protocol == IPPROTO_UDPLITE ||
- iph->protocol == IPPROTO_SCTP ||
- iph->protocol == IPPROTO_DCCP ||
- iph->protocol == IPPROTO_ESP) &&
- pskb_network_may_pull(skb, iph->ihl * 4 + 4))
- h2 ^= *(((u32*)iph) + iph->ihl);
+ if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+ break;
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
+ iph = ip_hdr(skb);
+ h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
+ }
break;
}
case htons(ETH_P_IPV6):
{
struct ipv6hdr *iph;
+ int poff;
if (!pskb_network_may_pull(skb, sizeof(*iph)))
goto err;
iph = ipv6_hdr(skb);
h = (__force u32)iph->daddr.s6_addr32[3];
h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
- if ((iph->nexthdr == IPPROTO_TCP ||
- iph->nexthdr == IPPROTO_UDP ||
- iph->nexthdr == IPPROTO_UDPLITE ||
- iph->nexthdr == IPPROTO_SCTP ||
- iph->nexthdr == IPPROTO_DCCP ||
- iph->nexthdr == IPPROTO_ESP) &&
- pskb_network_may_pull(skb, sizeof(*iph) + 4))
- h2 ^= *(u32*)&iph[1];
+ poff = proto_ports_offset(iph->nexthdr);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
+ iph = ipv6_hdr(skb);
+ h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
+ }
break;
}
default:
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..7848d12f5e4d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -535,14 +535,13 @@ void sock_release(struct socket *sock)
}
EXPORT_SYMBOL(sock_release);
-int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
- union skb_shared_tx *shtx)
+int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
{
- shtx->flags = 0;
+ *tx_flags = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
- shtx->hardware = 1;
+ *tx_flags |= SKBTX_HW_TSTAMP;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
- shtx->software = 1;
+ *tx_flags |= SKBTX_SW_TSTAMP;
return 0;
}
EXPORT_SYMBOL(sock_tx_timestamp);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8dc47f1d0001..36cb66022a27 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -19,6 +19,15 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
+struct rpc_cred_cache {
+ struct hlist_head *hashtable;
+ unsigned int hashbits;
+ spinlock_t lock;
+};
+
+static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
+
static DEFINE_SPINLOCK(rpc_authflavor_lock);
static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
&authnull_ops, /* AUTH_NULL */
@@ -29,6 +38,47 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
static LIST_HEAD(cred_unused);
static unsigned long number_cred_unused;
+#define MAX_HASHTABLE_BITS (10)
+static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
+{
+ unsigned long num;
+ unsigned int nbits;
+ int ret;
+
+ if (!val)
+ goto out_inval;
+ ret = strict_strtoul(val, 0, &num);
+ if (ret == -EINVAL)
+ goto out_inval;
+ nbits = fls(num);
+ if (num > (1U << nbits))
+ nbits++;
+ if (nbits > MAX_HASHTABLE_BITS || nbits < 2)
+ goto out_inval;
+ *(unsigned int *)kp->arg = nbits;
+ return 0;
+out_inval:
+ return -EINVAL;
+}
+
+static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
+{
+ unsigned int nbits;
+
+ nbits = *(unsigned int *)kp->arg;
+ return sprintf(buffer, "%u", 1U << nbits);
+}
+
+#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
+
+static struct kernel_param_ops param_ops_hashtbl_sz = {
+ .set = param_set_hashtbl_sz,
+ .get = param_get_hashtbl_sz,
+};
+
+module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
+MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
+
static u32
pseudoflavor_to_flavor(u32 flavor) {
if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -145,16 +195,23 @@ int
rpcauth_init_credcache(struct rpc_auth *auth)
{
struct rpc_cred_cache *new;
- int i;
+ unsigned int hashsize;
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (!new)
- return -ENOMEM;
- for (i = 0; i < RPC_CREDCACHE_NR; i++)
- INIT_HLIST_HEAD(&new->hashtable[i]);
+ goto out_nocache;
+ new->hashbits = auth_hashbits;
+ hashsize = 1U << new->hashbits;
+ new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL);
+ if (!new->hashtable)
+ goto out_nohashtbl;
spin_lock_init(&new->lock);
auth->au_credcache = new;
return 0;
+out_nohashtbl:
+ kfree(new);
+out_nocache:
+ return -ENOMEM;
}
EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
@@ -183,11 +240,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache)
LIST_HEAD(free);
struct hlist_head *head;
struct rpc_cred *cred;
+ unsigned int hashsize = 1U << cache->hashbits;
int i;
spin_lock(&rpc_credcache_lock);
spin_lock(&cache->lock);
- for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+ for (i = 0; i < hashsize; i++) {
head = &cache->hashtable[i];
while (!hlist_empty(head)) {
cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
@@ -216,6 +274,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
if (cache) {
auth->au_credcache = NULL;
rpcauth_clear_credcache(cache);
+ kfree(cache->hashtable);
kfree(cache);
}
}
@@ -297,7 +356,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
*entry, *new;
unsigned int nr;
- nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS);
+ nr = hash_long(acred->uid, cache->hashbits);
rcu_read_lock();
hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
@@ -390,16 +449,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
}
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
-void
+struct rpc_cred *
rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
{
- task->tk_msg.rpc_cred = get_rpccred(cred);
dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
cred->cr_auth->au_ops->au_name, cred);
+ return get_rpccred(cred);
}
EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
-static void
+static struct rpc_cred *
rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
@@ -407,45 +466,43 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
.uid = 0,
.gid = 0,
};
- struct rpc_cred *ret;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
- ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
- if (!IS_ERR(ret))
- task->tk_msg.rpc_cred = ret;
- else
- task->tk_status = PTR_ERR(ret);
+ return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
-static void
+static struct rpc_cred *
rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
- struct rpc_cred *ret;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, auth->au_ops->au_name);
- ret = rpcauth_lookupcred(auth, lookupflags);
- if (!IS_ERR(ret))
- task->tk_msg.rpc_cred = ret;
- else
- task->tk_status = PTR_ERR(ret);
+ return rpcauth_lookupcred(auth, lookupflags);
}
-void
+static int
rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_cred *new;
int lookupflags = 0;
if (flags & RPC_TASK_ASYNC)
lookupflags |= RPCAUTH_LOOKUP_NEW;
if (cred != NULL)
- cred->cr_ops->crbind(task, cred, lookupflags);
+ new = cred->cr_ops->crbind(task, cred, lookupflags);
else if (flags & RPC_TASK_ROOTCREDS)
- rpcauth_bind_root_cred(task, lookupflags);
+ new = rpcauth_bind_root_cred(task, lookupflags);
else
- rpcauth_bind_new_cred(task, lookupflags);
+ new = rpcauth_bind_new_cred(task, lookupflags);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ if (req->rq_cred != NULL)
+ put_rpccred(req->rq_cred);
+ req->rq_cred = new;
+ return 0;
}
void
@@ -484,22 +541,10 @@ out_nodestroy:
}
EXPORT_SYMBOL_GPL(put_rpccred);
-void
-rpcauth_unbindcred(struct rpc_task *task)
-{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
-
- dprintk("RPC: %5u releasing %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
- put_rpccred(cred);
- task->tk_msg.rpc_cred = NULL;
-}
-
__be32 *
rpcauth_marshcred(struct rpc_task *task, __be32 *p)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u marshaling %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -510,7 +555,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p)
__be32 *
rpcauth_checkverf(struct rpc_task *task, __be32 *p)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u validating %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -522,7 +567,7 @@ int
rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
__be32 *data, void *obj)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -536,7 +581,7 @@ int
rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
__be32 *data, void *obj)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -550,13 +595,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
int
rpcauth_refreshcred(struct rpc_task *task)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
int err;
+ cred = task->tk_rqstp->rq_cred;
+ if (cred == NULL) {
+ err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags);
+ if (err < 0)
+ goto out;
+ cred = task->tk_rqstp->rq_cred;
+ };
dprintk("RPC: %5u refreshing %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
err = cred->cr_ops->crrefresh(task);
+out:
if (err < 0)
task->tk_status = err;
return err;
@@ -565,7 +618,7 @@ rpcauth_refreshcred(struct rpc_task *task)
void
rpcauth_invalcred(struct rpc_task *task)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
dprintk("RPC: %5u invalidating %s cred %p\n",
task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -576,7 +629,7 @@ rpcauth_invalcred(struct rpc_task *task)
int
rpcauth_uptodatecred(struct rpc_task *task)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
return cred == NULL ||
test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
@@ -587,14 +640,27 @@ static struct shrinker rpc_cred_shrinker = {
.seeks = DEFAULT_SEEKS,
};
-void __init rpcauth_init_module(void)
+int __init rpcauth_init_module(void)
{
- rpc_init_authunix();
- rpc_init_generic_auth();
+ int err;
+
+ err = rpc_init_authunix();
+ if (err < 0)
+ goto out1;
+ err = rpc_init_generic_auth();
+ if (err < 0)
+ goto out2;
register_shrinker(&rpc_cred_shrinker);
+ return 0;
+out2:
+ rpc_destroy_authunix();
+out1:
+ return err;
}
void __exit rpcauth_remove_module(void)
{
+ rpc_destroy_authunix();
+ rpc_destroy_generic_auth();
unregister_shrinker(&rpc_cred_shrinker);
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 8f623b0f03dd..43162bb3b78f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -27,7 +27,6 @@ struct generic_cred {
};
static struct rpc_auth generic_auth;
-static struct rpc_cred_cache generic_cred_cache;
static const struct rpc_credops generic_credops;
/*
@@ -55,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
}
EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
-static void
-generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
+static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
+ struct rpc_cred *cred, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
- struct rpc_cred *ret;
- ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
- if (!IS_ERR(ret))
- task->tk_msg.rpc_cred = ret;
- else
- task->tk_status = PTR_ERR(ret);
+ return auth->au_ops->lookup_cred(auth, acred, lookupflags);
}
/*
@@ -159,20 +153,16 @@ out_nomatch:
return 0;
}
-void __init rpc_init_generic_auth(void)
+int __init rpc_init_generic_auth(void)
{
- spin_lock_init(&generic_cred_cache.lock);
+ return rpcauth_init_credcache(&generic_auth);
}
void __exit rpc_destroy_generic_auth(void)
{
- rpcauth_clear_credcache(&generic_cred_cache);
+ rpcauth_destroy_credcache(&generic_auth);
}
-static struct rpc_cred_cache generic_cred_cache = {
- {{ NULL, },},
-};
-
static const struct rpc_authops generic_auth_ops = {
.owner = THIS_MODULE,
.au_name = "Generic",
@@ -183,7 +173,6 @@ static const struct rpc_authops generic_auth_ops = {
static struct rpc_auth generic_auth = {
.au_ops = &generic_auth_ops,
.au_count = ATOMIC_INIT(0),
- .au_credcache = &generic_cred_cache,
};
static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8da2a0e68574..dcfc66bab2bb 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -373,7 +373,7 @@ gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss
static void
gss_upcall_callback(struct rpc_task *task)
{
- struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+ struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
struct inode *inode = &gss_msg->inode->vfs_inode;
@@ -502,7 +502,7 @@ static void warn_gssd(void)
static inline int
gss_refresh_upcall(struct rpc_task *task)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_auth *gss_auth = container_of(cred->cr_auth,
struct gss_auth, rpc_auth);
struct gss_cred *gss_cred = container_of(cred,
@@ -928,6 +928,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
dprintk("RPC: gss_free_ctx\n");
+ gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
kfree(ctx);
}
@@ -942,13 +943,7 @@ gss_free_ctx_callback(struct rcu_head *head)
static void
gss_free_ctx(struct gss_cl_ctx *ctx)
{
- struct gss_ctx *gc_gss_ctx;
-
- gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx);
- rcu_assign_pointer(ctx->gc_gss_ctx, NULL);
call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
- if (gc_gss_ctx)
- gss_delete_sec_context(&gc_gss_ctx);
}
static void
@@ -1064,12 +1059,12 @@ out:
static __be32 *
gss_marshal(struct rpc_task *task, __be32 *p)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_cred *cred = req->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *cred_len;
- struct rpc_rqst *req = task->tk_rqstp;
u32 maj_stat = 0;
struct xdr_netobj mic;
struct kvec iov;
@@ -1119,7 +1114,7 @@ out_put_ctx:
static int gss_renew_cred(struct rpc_task *task)
{
- struct rpc_cred *oldcred = task->tk_msg.rpc_cred;
+ struct rpc_cred *oldcred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(oldcred,
struct gss_cred,
gc_base);
@@ -1133,7 +1128,7 @@ static int gss_renew_cred(struct rpc_task *task)
new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
if (IS_ERR(new))
return PTR_ERR(new);
- task->tk_msg.rpc_cred = new;
+ task->tk_rqstp->rq_cred = new;
put_rpccred(oldcred);
return 0;
}
@@ -1161,7 +1156,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred)
static int
gss_refresh(struct rpc_task *task)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
int ret = 0;
if (gss_cred_is_negative_entry(cred))
@@ -1172,7 +1167,7 @@ gss_refresh(struct rpc_task *task)
ret = gss_renew_cred(task);
if (ret < 0)
goto out;
- cred = task->tk_msg.rpc_cred;
+ cred = task->tk_rqstp->rq_cred;
}
if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
@@ -1191,7 +1186,7 @@ gss_refresh_null(struct rpc_task *task)
static __be32 *
gss_validate(struct rpc_task *task, __be32 *p)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 seq;
struct kvec iov;
@@ -1400,7 +1395,7 @@ static int
gss_wrap_req(struct rpc_task *task,
kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
@@ -1503,7 +1498,7 @@ static int
gss_unwrap_resp(struct rpc_task *task,
kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 1db618f56ecb..a5c36c01707b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p)
static int
nul_refresh(struct rpc_task *task)
{
- set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
return 0;
}
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index aac2f8b4ee21..4cb70dc6e7ad 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -29,7 +29,6 @@ struct unx_cred {
#endif
static struct rpc_auth unix_auth;
-static struct rpc_cred_cache unix_cred_cache;
static const struct rpc_credops unix_credops;
static struct rpc_auth *
@@ -141,7 +140,7 @@ static __be32 *
unx_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_clnt *clnt = task->tk_client;
- struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
+ struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
__be32 *base, *hold;
int i;
@@ -174,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
static int
unx_refresh(struct rpc_task *task)
{
- set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
return 0;
}
@@ -197,15 +196,20 @@ unx_validate(struct rpc_task *task, __be32 *p)
printk("RPC: giant verf size: %u\n", size);
return NULL;
}
- task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2;
+ task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
p += (size >> 2);
return p;
}
-void __init rpc_init_authunix(void)
+int __init rpc_init_authunix(void)
{
- spin_lock_init(&unix_cred_cache.lock);
+ return rpcauth_init_credcache(&unix_auth);
+}
+
+void rpc_destroy_authunix(void)
+{
+ rpcauth_destroy_credcache(&unix_auth);
}
const struct rpc_authops authunix_ops = {
@@ -219,17 +223,12 @@ const struct rpc_authops authunix_ops = {
};
static
-struct rpc_cred_cache unix_cred_cache = {
-};
-
-static
struct rpc_auth unix_auth = {
.au_cslack = UNX_WRITESLACK,
.au_rslack = 2, /* assume AUTH_NULL verf */
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
.au_count = ATOMIC_INIT(0),
- .au_credcache = &unix_cred_cache,
};
static
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 58de76c8540c..2b06410e584e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,7 +34,6 @@
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-#include <linux/smp_lock.h>
#define RPCDBG_FACILITY RPCDBG_CACHE
@@ -320,7 +319,7 @@ static struct cache_detail *current_detail;
static int current_index;
static void do_cache_clean(struct work_struct *work);
-static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
+static struct delayed_work cache_cleaner;
static void sunrpc_init_cache_detail(struct cache_detail *cd)
{
@@ -1504,6 +1503,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
}
#endif
+void __init cache_initialize(void)
+{
+ INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
+}
+
int cache_register(struct cache_detail *cd)
{
int ret;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 756fc324db9e..2388d83b68ff 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -414,6 +414,35 @@ out_no_clnt:
EXPORT_SYMBOL_GPL(rpc_clone_client);
/*
+ * Kill all tasks for the given client.
+ * XXX: kill their descendants as well?
+ */
+void rpc_killall_tasks(struct rpc_clnt *clnt)
+{
+ struct rpc_task *rovr;
+
+
+ if (list_empty(&clnt->cl_tasks))
+ return;
+ dprintk("RPC: killing all tasks for client %p\n", clnt);
+ /*
+ * Spin lock all_tasks to prevent changes...
+ */
+ spin_lock(&clnt->cl_lock);
+ list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
+ if (!RPC_IS_ACTIVATED(rovr))
+ continue;
+ if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
+ rovr->tk_flags |= RPC_TASK_KILLED;
+ rpc_exit(rovr, -EIO);
+ rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr);
+ }
+ }
+ spin_unlock(&clnt->cl_lock);
+}
+EXPORT_SYMBOL_GPL(rpc_killall_tasks);
+
+/*
* Properly shut down an RPC client, terminating all outstanding
* requests.
*/
@@ -538,6 +567,49 @@ out:
}
EXPORT_SYMBOL_GPL(rpc_bind_new_program);
+void rpc_task_release_client(struct rpc_task *task)
+{
+ struct rpc_clnt *clnt = task->tk_client;
+
+ if (clnt != NULL) {
+ /* Remove from client task list */
+ spin_lock(&clnt->cl_lock);
+ list_del(&task->tk_task);
+ spin_unlock(&clnt->cl_lock);
+ task->tk_client = NULL;
+
+ rpc_release_client(clnt);
+ }
+}
+
+static
+void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+ if (clnt != NULL) {
+ rpc_task_release_client(task);
+ task->tk_client = clnt;
+ kref_get(&clnt->cl_kref);
+ if (clnt->cl_softrtry)
+ task->tk_flags |= RPC_TASK_SOFT;
+ /* Add to the client's list of all tasks */
+ spin_lock(&clnt->cl_lock);
+ list_add_tail(&task->tk_task, &clnt->cl_tasks);
+ spin_unlock(&clnt->cl_lock);
+ }
+}
+
+static void
+rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
+{
+ if (msg != NULL) {
+ task->tk_msg.rpc_proc = msg->rpc_proc;
+ task->tk_msg.rpc_argp = msg->rpc_argp;
+ task->tk_msg.rpc_resp = msg->rpc_resp;
+ if (msg->rpc_cred != NULL)
+ task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
+ }
+}
+
/*
* Default callback for async RPC calls
*/
@@ -562,6 +634,18 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
if (IS_ERR(task))
goto out;
+ rpc_task_set_client(task, task_setup_data->rpc_client);
+ rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
+
+ if (task->tk_status != 0) {
+ int ret = task->tk_status;
+ rpc_put_task(task);
+ return ERR_PTR(ret);
+ }
+
+ if (task->tk_action == NULL)
+ rpc_call_start(task);
+
atomic_inc(&task->tk_count);
rpc_execute(task);
out:
@@ -756,12 +840,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind);
* Restart an (async) RPC call from the call_prepare state.
* Usually called from within the exit handler.
*/
-void
+int
rpc_restart_call_prepare(struct rpc_task *task)
{
if (RPC_ASSASSINATED(task))
- return;
+ return 0;
task->tk_action = rpc_prepare_task;
+ return 1;
}
EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
@@ -769,13 +854,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
* Restart an (async) RPC call. Usually called from within the
* exit handler.
*/
-void
+int
rpc_restart_call(struct rpc_task *task)
{
if (RPC_ASSASSINATED(task))
- return;
-
+ return 0;
task->tk_action = call_start;
+ return 1;
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
@@ -824,11 +909,6 @@ call_reserve(struct rpc_task *task)
{
dprint_status(task);
- if (!rpcauth_uptodatecred(task)) {
- task->tk_action = call_refresh;
- return;
- }
-
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
@@ -892,7 +972,7 @@ call_reserveresult(struct rpc_task *task)
static void
call_allocate(struct rpc_task *task)
{
- unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack;
+ unsigned int slack = task->tk_client->cl_auth->au_cslack;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -900,7 +980,7 @@ call_allocate(struct rpc_task *task)
dprint_status(task);
task->tk_status = 0;
- task->tk_action = call_bind;
+ task->tk_action = call_refresh;
if (req->rq_buffer)
return;
@@ -937,6 +1017,47 @@ call_allocate(struct rpc_task *task)
rpc_exit(task, -ERESTARTSYS);
}
+/*
+ * 2a. Bind and/or refresh the credentials
+ */
+static void
+call_refresh(struct rpc_task *task)
+{
+ dprint_status(task);
+
+ task->tk_action = call_refreshresult;
+ task->tk_status = 0;
+ task->tk_client->cl_stats->rpcauthrefresh++;
+ rpcauth_refreshcred(task);
+}
+
+/*
+ * 2b. Process the results of a credential refresh
+ */
+static void
+call_refreshresult(struct rpc_task *task)
+{
+ int status = task->tk_status;
+
+ dprint_status(task);
+
+ task->tk_status = 0;
+ task->tk_action = call_bind;
+ if (status >= 0 && rpcauth_uptodatecred(task))
+ return;
+ switch (status) {
+ case -EACCES:
+ rpc_exit(task, -EACCES);
+ return;
+ case -ENOMEM:
+ rpc_exit(task, -ENOMEM);
+ return;
+ case -ETIMEDOUT:
+ rpc_delay(task, 3*HZ);
+ }
+ task->tk_action = call_refresh;
+}
+
static inline int
rpc_task_need_encode(struct rpc_task *task)
{
@@ -1472,43 +1593,6 @@ out_retry:
}
}
-/*
- * 8. Refresh the credentials if rejected by the server
- */
-static void
-call_refresh(struct rpc_task *task)
-{
- dprint_status(task);
-
- task->tk_action = call_refreshresult;
- task->tk_status = 0;
- task->tk_client->cl_stats->rpcauthrefresh++;
- rpcauth_refreshcred(task);
-}
-
-/*
- * 8a. Process the results of a credential refresh
- */
-static void
-call_refreshresult(struct rpc_task *task)
-{
- int status = task->tk_status;
-
- dprint_status(task);
-
- task->tk_status = 0;
- task->tk_action = call_reserve;
- if (status >= 0 && rpcauth_uptodatecred(task))
- return;
- if (status == -EACCES) {
- rpc_exit(task, -EACCES);
- return;
- }
- task->tk_action = call_refresh;
- if (status != -ETIMEDOUT)
- rpc_delay(task, 3*HZ);
-}
-
static __be32 *
rpc_encode_header(struct rpc_task *task)
{
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4a843b883b89..cace6049e4a5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -246,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
static void rpc_set_active(struct rpc_task *task)
{
- struct rpc_clnt *clnt;
- if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
- return;
rpc_task_set_debuginfo(task);
- /* Add to global list of all tasks */
- clnt = task->tk_client;
- if (clnt != NULL) {
- spin_lock(&clnt->cl_lock);
- list_add_tail(&task->tk_task, &clnt->cl_tasks);
- spin_unlock(&clnt->cl_lock);
- }
+ set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
}
/*
@@ -319,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
- if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
- printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
- return;
- }
-
__rpc_add_wait_queue(q, task);
BUG_ON(task->tk_callback != NULL);
@@ -334,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action)
{
- /* Mark the task as being activated if so needed */
- rpc_set_active(task);
+ /* We shouldn't ever put an inactive task to sleep */
+ BUG_ON(!RPC_IS_ACTIVATED(task));
/*
* Protect the queue operations.
@@ -406,14 +392,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
/*
- * Wake up the specified task
- */
-static void rpc_wake_up_task(struct rpc_task *task)
-{
- rpc_wake_up_queued_task(task->tk_waitqueue, task);
-}
-
-/*
* Wake up the next task on a priority queue.
*/
static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
@@ -600,7 +578,15 @@ void rpc_exit_task(struct rpc_task *task)
}
}
}
-EXPORT_SYMBOL_GPL(rpc_exit_task);
+
+void rpc_exit(struct rpc_task *task, int status)
+{
+ task->tk_status = status;
+ task->tk_action = rpc_exit_task;
+ if (RPC_IS_QUEUED(task))
+ rpc_wake_up_queued_task(task->tk_waitqueue, task);
+}
+EXPORT_SYMBOL_GPL(rpc_exit);
void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
{
@@ -690,7 +676,6 @@ static void __rpc_execute(struct rpc_task *task)
dprintk("RPC: %5u got signal\n", task->tk_pid);
task->tk_flags |= RPC_TASK_KILLED;
rpc_exit(task, -ERESTARTSYS);
- rpc_wake_up_task(task);
}
rpc_set_running(task);
dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
@@ -714,8 +699,9 @@ static void __rpc_execute(struct rpc_task *task)
void rpc_execute(struct rpc_task *task)
{
rpc_set_active(task);
- rpc_set_running(task);
- __rpc_execute(task);
+ rpc_make_runnable(task);
+ if (!RPC_IS_ASYNC(task))
+ __rpc_execute(task);
}
static void rpc_async_schedule(struct work_struct *work)
@@ -808,26 +794,9 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
/* Initialize workqueue for async tasks */
task->tk_workqueue = task_setup_data->workqueue;
- task->tk_client = task_setup_data->rpc_client;
- if (task->tk_client != NULL) {
- kref_get(&task->tk_client->cl_kref);
- if (task->tk_client->cl_softrtry)
- task->tk_flags |= RPC_TASK_SOFT;
- }
-
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
- if (task_setup_data->rpc_message != NULL) {
- task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc;
- task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp;
- task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp;
- /* Bind the user cred */
- rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags);
- if (task->tk_action == NULL)
- rpc_call_start(task);
- }
-
/* starting timestamp */
task->tk_start = ktime_get();
@@ -896,11 +865,8 @@ void rpc_put_task(struct rpc_task *task)
if (task->tk_rqstp)
xprt_release(task);
if (task->tk_msg.rpc_cred)
- rpcauth_unbindcred(task);
- if (task->tk_client) {
- rpc_release_client(task->tk_client);
- task->tk_client = NULL;
- }
+ put_rpccred(task->tk_msg.rpc_cred);
+ rpc_task_release_client(task);
if (task->tk_workqueue != NULL) {
INIT_WORK(&task->u.tk_work, rpc_async_release);
queue_work(task->tk_workqueue, &task->u.tk_work);
@@ -913,13 +879,6 @@ static void rpc_release_task(struct rpc_task *task)
{
dprintk("RPC: %5u release task\n", task->tk_pid);
- if (!list_empty(&task->tk_task)) {
- struct rpc_clnt *clnt = task->tk_client;
- /* Remove from client task list */
- spin_lock(&clnt->cl_lock);
- list_del(&task->tk_task);
- spin_unlock(&clnt->cl_lock);
- }
BUG_ON (RPC_IS_QUEUED(task));
/* Wake up anyone who is waiting for task completion */
@@ -928,35 +887,6 @@ static void rpc_release_task(struct rpc_task *task)
rpc_put_task(task);
}
-/*
- * Kill all tasks for the given client.
- * XXX: kill their descendants as well?
- */
-void rpc_killall_tasks(struct rpc_clnt *clnt)
-{
- struct rpc_task *rovr;
-
-
- if (list_empty(&clnt->cl_tasks))
- return;
- dprintk("RPC: killing all tasks for client %p\n", clnt);
- /*
- * Spin lock all_tasks to prevent changes...
- */
- spin_lock(&clnt->cl_lock);
- list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
- if (! RPC_IS_ACTIVATED(rovr))
- continue;
- if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
- rovr->tk_flags |= RPC_TASK_KILLED;
- rpc_exit(rovr, -EIO);
- rpc_wake_up_task(rovr);
- }
- }
- spin_unlock(&clnt->cl_lock);
-}
-EXPORT_SYMBOL_GPL(rpc_killall_tasks);
-
int rpciod_up(void)
{
return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f438347d817b..c0d085013a2b 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -33,21 +33,27 @@ init_sunrpc(void)
if (err)
goto out;
err = rpc_init_mempool();
- if (err) {
- unregister_rpc_pipefs();
- goto out;
- }
+ if (err)
+ goto out2;
+ err = rpcauth_init_module();
+ if (err)
+ goto out3;
#ifdef RPC_DEBUG
rpc_register_sysctl();
#endif
#ifdef CONFIG_PROC_FS
rpc_proc_init();
#endif
+ cache_initialize();
cache_register(&ip_map_cache);
cache_register(&unix_gid_cache);
svc_init_xprt_sock(); /* svc sock transport */
init_socket_xprt(); /* clnt sock transport */
- rpcauth_init_module();
+ return 0;
+out3:
+ rpc_destroy_mempool();
+out2:
+ unregister_rpc_pipefs();
out:
return err;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index dcd0132396ba..970fb00f388c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1032,6 +1032,8 @@ void xprt_release(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(req->rq_buffer);
+ if (req->rq_cred != NULL)
+ put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
if (req->rq_release_snd_buf)
req->rq_release_snd_buf(req);
@@ -1129,6 +1131,7 @@ static void xprt_destroy(struct kref *kref)
rpc_destroy_wait_queue(&xprt->sending);
rpc_destroy_wait_queue(&xprt->resend);
rpc_destroy_wait_queue(&xprt->backlog);
+ cancel_work_sync(&xprt->task_cleanup);
/*
* Tear down transport state and free the rpc_xprt
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7ca65c7005ea..49a62f0c4b87 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2577,7 +2577,8 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_bc_tcp_transport);
}
-static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
+static int param_set_uint_minmax(const char *val,
+ const struct kernel_param *kp,
unsigned int min, unsigned int max)
{
unsigned long num;
@@ -2592,34 +2593,37 @@ static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
return 0;
}
-static int param_set_portnr(const char *val, struct kernel_param *kp)
+static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
RPC_MIN_RESVPORT,
RPC_MAX_RESVPORT);
}
-static int param_get_portnr(char *buffer, struct kernel_param *kp)
-{
- return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_portnr = {
+ .set = param_set_portnr,
+ .get = param_get_uint,
+};
+
#define param_check_portnr(name, p) \
__param_check(name, p, unsigned int);
module_param_named(min_resvport, xprt_min_resvport, portnr, 0644);
module_param_named(max_resvport, xprt_max_resvport, portnr, 0644);
-static int param_set_slot_table_size(const char *val, struct kernel_param *kp)
+static int param_set_slot_table_size(const char *val,
+ const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
RPC_MIN_SLOT_TABLE,
RPC_MAX_SLOT_TABLE);
}
-static int param_get_slot_table_size(char *buffer, struct kernel_param *kp)
-{
- return param_get_uint(buffer, kp);
-}
+static struct kernel_param_ops param_ops_slot_table_size = {
+ .set = param_set_slot_table_size,
+ .get = param_get_uint,
+};
+
#define param_check_slot_table_size(name, p) \
__param_check(name, p, unsigned int);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a008c6689305..b11248c2d788 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -143,6 +143,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
}
+static void bclink_set_last_sent(void)
+{
+ if (bcl->next_out)
+ bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
+ else
+ bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
+}
+
+u32 tipc_bclink_get_last_sent(void)
+{
+ return bcl->fsm_msg_cnt;
+}
+
/**
* bclink_set_gap - set gap according to contents of current deferred pkt queue
*
@@ -237,8 +250,10 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
/* Try resolving broadcast link congestion, if necessary */
- if (unlikely(bcl->next_out))
+ if (unlikely(bcl->next_out)) {
tipc_link_push_queue(bcl);
+ bclink_set_last_sent();
+ }
if (unlikely(released && !list_empty(&bcl->waiting_ports)))
tipc_link_wakeup_ports(bcl, 0);
spin_unlock_bh(&bc_lock);
@@ -395,7 +410,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
if (unlikely(res == -ELINKCONG))
buf_discard(buf);
else
- bcl->stats.sent_info++;
+ bclink_set_last_sent();
if (bcl->out_queue_size > bcl->stats.max_queue_sz)
bcl->stats.max_queue_sz = bcl->out_queue_size;
@@ -529,15 +544,6 @@ receive:
tipc_node_unlock(node);
}
-u32 tipc_bclink_get_last_sent(void)
-{
- u32 last_sent = mod(bcl->next_out_no - 1);
-
- if (bcl->next_out)
- last_sent = mod(buf_seqno(bcl->next_out) - 1);
- return last_sent;
-}
-
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
{
return (n_ptr->bclink.supported &&
@@ -570,6 +576,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
msg = buf_msg(buf);
msg_set_non_seq(msg, 1);
msg_set_mc_netid(msg, tipc_net_id);
+ bcl->stats.sent_info++;
}
/* Send buffer over bearers until all targets reached */
@@ -609,11 +616,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
bcbearer->remains = bcbearer->remains_new;
}
- /* Unable to reach all targets */
+ /*
+ * Unable to reach all targets (indicate success, since currently
+ * there isn't code in place to properly block & unblock the
+ * pseudo-bearer used by the broadcast link)
+ */
- bcbearer->bearer.publ.blocked = 1;
- bcl->stats.bearer_congs++;
- return 1;
+ return TIPC_OK;
}
/**
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 696468117985..466b861dab91 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -169,6 +169,7 @@ void tipc_core_stop(void)
tipc_nametbl_stop();
tipc_ref_table_stop();
tipc_socket_stop();
+ tipc_log_resize(0);
}
/**
@@ -203,7 +204,9 @@ static int __init tipc_init(void)
{
int res;
- tipc_log_resize(CONFIG_TIPC_LOG);
+ if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
+ warn("Unable to create log buffer\n");
+
info("Activated (version " TIPC_MOD_VER
" compiled " __DATE__ " " __TIME__ ")\n");
@@ -230,7 +233,6 @@ static void __exit tipc_exit(void)
tipc_core_stop_net();
tipc_core_stop();
info("Deactivated\n");
- tipc_log_resize(0);
}
module_init(tipc_init);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fc1fcf5e6b53..f28d1ae93125 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -203,6 +203,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
return;
}
spin_lock_bh(&n_ptr->lock);
+
+ /* Don't talk to neighbor during cleanup after last session */
+
+ if (n_ptr->cleanup_required) {
+ spin_unlock_bh(&n_ptr->lock);
+ return;
+ }
+
link = n_ptr->links[b_ptr->identity];
if (!link) {
dbg("creating link\n");
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a3616b99529b..a6a3102bb4d6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1802,6 +1802,15 @@ static int link_recv_buf_validate(struct sk_buff *buf)
return pskb_may_pull(buf, hdr_size);
}
+/**
+ * tipc_recv_msg - process TIPC messages arriving from off-node
+ * @head: pointer to message buffer chain
+ * @tb_ptr: pointer to bearer message arrived on
+ *
+ * Invoked with no locks held. Bearer pointer must point to a valid bearer
+ * structure (i.e. cannot be NULL), but bearer can be inactive.
+ */
+
void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
{
read_lock_bh(&tipc_net_lock);
@@ -1819,6 +1828,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
head = head->next;
+ /* Ensure bearer is still enabled */
+
+ if (unlikely(!b_ptr->active))
+ goto cont;
+
/* Ensure message is well-formed */
if (unlikely(!link_recv_buf_validate(buf)))
@@ -1855,13 +1869,22 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
goto cont;
}
- /* Locate unicast link endpoint that should handle message */
+ /* Locate neighboring node that sent message */
n_ptr = tipc_node_find(msg_prevnode(msg));
if (unlikely(!n_ptr))
goto cont;
tipc_node_lock(n_ptr);
+ /* Don't talk to neighbor during cleanup after last session */
+
+ if (n_ptr->cleanup_required) {
+ tipc_node_unlock(n_ptr);
+ goto cont;
+ }
+
+ /* Locate unicast link endpoint that should handle message */
+
l_ptr = n_ptr->links[b_ptr->identity];
if (unlikely(!l_ptr)) {
tipc_node_unlock(n_ptr);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 8ba79620db3f..d504e490fd02 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -877,7 +877,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
u32 index)
{
char portIdStr[27];
- char *scopeStr;
+ const char *scope_str[] = {"", " zone", " cluster", " node"};
struct publication *publ = sseq->zone_list;
tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
@@ -893,15 +893,8 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
tipc_node(publ->node), publ->ref);
tipc_printf(buf, "%-26s ", portIdStr);
if (depth > 3) {
- if (publ->node != tipc_own_addr)
- scopeStr = "";
- else if (publ->scope == TIPC_NODE_SCOPE)
- scopeStr = "node";
- else if (publ->scope == TIPC_CLUSTER_SCOPE)
- scopeStr = "cluster";
- else
- scopeStr = "zone";
- tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
+ tipc_printf(buf, "%-10u %s", publ->key,
+ scope_str[publ->scope]);
}
publ = publ->zone_list_next;
@@ -951,24 +944,19 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
static void nametbl_header(struct print_buf *buf, u32 depth)
{
- tipc_printf(buf, "Type ");
-
- if (depth > 1)
- tipc_printf(buf, "Lower Upper ");
- if (depth > 2)
- tipc_printf(buf, "Port Identity ");
- if (depth > 3)
- tipc_printf(buf, "Publication");
-
- tipc_printf(buf, "\n-----------");
-
- if (depth > 1)
- tipc_printf(buf, "--------------------- ");
- if (depth > 2)
- tipc_printf(buf, "-------------------------- ");
- if (depth > 3)
- tipc_printf(buf, "------------------");
-
+ const char *header[] = {
+ "Type ",
+ "Lower Upper ",
+ "Port Identity ",
+ "Publication Scope"
+ };
+
+ int i;
+
+ if (depth > 4)
+ depth = 4;
+ for (i = 0; i < depth; i++)
+ tipc_printf(buf, header[i]);
tipc_printf(buf, "\n");
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b634942caba5..b702c7bf580f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -237,8 +237,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
int tipc_node_has_active_links(struct tipc_node *n_ptr)
{
- return (n_ptr &&
- ((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
+ return n_ptr->active_links[0] != NULL;
}
int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
@@ -384,6 +383,20 @@ static void node_established_contact(struct tipc_node *n_ptr)
tipc_highest_allowed_slave);
}
+static void node_cleanup_finished(unsigned long node_addr)
+{
+ struct tipc_node *n_ptr;
+
+ read_lock_bh(&tipc_net_lock);
+ n_ptr = tipc_node_find(node_addr);
+ if (n_ptr) {
+ tipc_node_lock(n_ptr);
+ n_ptr->cleanup_required = 0;
+ tipc_node_unlock(n_ptr);
+ }
+ read_unlock_bh(&tipc_net_lock);
+}
+
static void node_lost_contact(struct tipc_node *n_ptr)
{
struct cluster *c_ptr;
@@ -458,6 +471,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
tipc_k_signal((Handler)ns->handle_node_down,
(unsigned long)ns->usr_handle);
}
+
+ /* Prevent re-contact with node until all cleanup is done */
+
+ n_ptr->cleanup_required = 1;
+ tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
}
/**
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6f990da5d143..45f3db3a595d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -52,6 +52,7 @@
* @active_links: pointers to active links to node
* @links: pointers to all links to node
* @working_links: number of working links to node (both active and standby)
+ * @cleanup_required: non-zero if cleaning up after a prior loss of contact
* @link_cnt: number of links to node
* @permit_changeover: non-zero if node has redundant links to this system
* @routers: bitmap (used for multicluster communication)
@@ -78,6 +79,7 @@ struct tipc_node {
struct link *links[MAX_BEARERS];
int link_cnt;
int working_links;
+ int cleanup_required;
int permit_changeover;
u32 routers[512/32];
int last_router;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0737680e9266..ebcbc21d8f98 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -588,19 +588,10 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
if (!p_ptr) {
err = TIPC_ERR_NO_PORT;
} else if (p_ptr->publ.connected) {
- if (port_peernode(p_ptr) != msg_orignode(msg))
+ if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
+ (port_peerport(p_ptr) != msg_origport(msg))) {
err = TIPC_ERR_NO_PORT;
- if (port_peerport(p_ptr) != msg_origport(msg))
- err = TIPC_ERR_NO_PORT;
- if (!err && msg_routed(msg)) {
- u32 seqno = msg_transp_seqno(msg);
- u32 myno = ++p_ptr->last_in_seqno;
- if (seqno != myno) {
- err = TIPC_ERR_NO_PORT;
- abort_buf = port_build_self_abort_msg(p_ptr, err);
- }
- }
- if (msg_type(msg) == CONN_ACK) {
+ } else if (msg_type(msg) == CONN_ACK) {
int wakeup = tipc_port_congested(p_ptr) &&
p_ptr->publ.congested &&
p_ptr->wakeup;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66e889ba48fd..f7ac94de24fe 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -64,6 +64,7 @@ struct tipc_sock {
struct sock sk;
struct tipc_port *p;
struct tipc_portid peer_name;
+ long conn_timeout;
};
#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -240,9 +241,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
sock->state = state;
sock_init_data(sock, sk);
- sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
sk->sk_backlog_rcv = backlog_rcv;
tipc_sk(sk)->p = tp_ptr;
+ tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
spin_unlock_bh(tp_ptr->lock);
@@ -429,36 +430,55 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
* to handle any preventable race conditions, so TIPC will do the same ...
*
* TIPC sets the returned events as follows:
- * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty
- * or if a connection-oriented socket is does not have an active connection
- * (i.e. a read operation will not block).
- * b) POLLOUT is set except when a socket's connection has been terminated
- * (i.e. a write operation will not block).
- * c) POLLHUP is set when a socket's connection has been terminated.
- *
- * IMPORTANT: The fact that a read or write operation will not block does NOT
- * imply that the operation will succeed!
+ *
+ * socket state flags set
+ * ------------ ---------
+ * unconnected no read flags
+ * no write flags
+ *
+ * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
+ * no write flags
+ *
+ * connected POLLIN/POLLRDNORM if data in rx queue
+ * POLLOUT if port is not congested
+ *
+ * disconnecting POLLIN/POLLRDNORM/POLLHUP
+ * no write flags
+ *
+ * listening POLLIN if SYN in rx queue
+ * no write flags
+ *
+ * ready POLLIN/POLLRDNORM if data in rx queue
+ * [connectionless] POLLOUT (since port cannot be congested)
+ *
+ * IMPORTANT: The fact that a read or write operation is indicated does NOT
+ * imply that the operation will succeed, merely that it should be performed
+ * and will not block.
*/
static unsigned int poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
- u32 mask;
+ u32 mask = 0;
poll_wait(file, sk_sleep(sk), wait);
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sock->state == SS_UNCONNECTED) ||
- (sock->state == SS_DISCONNECTING))
- mask = (POLLRDNORM | POLLIN);
- else
- mask = 0;
-
- if (sock->state == SS_DISCONNECTING)
- mask |= POLLHUP;
- else
- mask |= POLLOUT;
+ switch ((int)sock->state) {
+ case SS_READY:
+ case SS_CONNECTED:
+ if (!tipc_sk_port(sk)->congested)
+ mask |= POLLOUT;
+ /* fall thru' */
+ case SS_CONNECTING:
+ case SS_LISTENING:
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ mask |= (POLLIN | POLLRDNORM);
+ break;
+ case SS_DISCONNECTING:
+ mask = (POLLIN | POLLRDNORM | POLLHUP);
+ break;
+ }
return mask;
}
@@ -1026,9 +1046,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
struct sk_buff *buf;
struct tipc_msg *msg;
unsigned int sz;
- int sz_to_copy;
+ int sz_to_copy, target, needed;
int sz_copied = 0;
- int needed;
char __user *crs = m->msg_iov->iov_base;
unsigned char *buf_crs;
u32 err;
@@ -1050,6 +1069,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
goto exit;
}
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
+
restart:
/* Look for a message in receive queue; wait if necessary */
@@ -1138,7 +1159,7 @@ restart:
if ((sz_copied < buf_len) && /* didn't get all requested data */
(!skb_queue_empty(&sk->sk_receive_queue) ||
- (flags & MSG_WAITALL)) && /* and more is ready or required */
+ (sz_copied < target)) && /* and more is ready or required */
(!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
(!err)) /* and haven't reached a FIN */
goto restart;
@@ -1365,6 +1386,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
struct msghdr m = {NULL,};
struct sk_buff *buf;
struct tipc_msg *msg;
+ long timeout;
int res;
lock_sock(sk);
@@ -1379,7 +1401,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
/* For now, TIPC does not support the non-blocking form of connect() */
if (flags & O_NONBLOCK) {
- res = -EWOULDBLOCK;
+ res = -EOPNOTSUPP;
goto exit;
}
@@ -1425,11 +1447,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
+ timeout = tipc_sk(sk)->conn_timeout;
release_sock(sk);
res = wait_event_interruptible_timeout(*sk_sleep(sk),
(!skb_queue_empty(&sk->sk_receive_queue) ||
(sock->state != SS_CONNECTING)),
- sk->sk_rcvtimeo);
+ timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
lock_sock(sk);
if (res > 0) {
@@ -1692,7 +1715,7 @@ static int setsockopt(struct socket *sock,
res = tipc_set_portunreturnable(tport->ref, value);
break;
case TIPC_CONN_TIMEOUT:
- sk->sk_rcvtimeo = msecs_to_jiffies(value);
+ tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
/* no need to set "res", since already 0 at this point */
break;
default:
@@ -1747,7 +1770,7 @@ static int getsockopt(struct socket *sock,
res = tipc_portunreturnable(tport->ref, &value);
break;
case TIPC_CONN_TIMEOUT:
- value = jiffies_to_msecs(sk->sk_rcvtimeo);
+ value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
/* no need to set "res", since already 0 at this point */
break;
case TIPC_NODE_RECVQ_DEPTH:
OpenPOWER on IntegriCloud