From dc8a82ad285dcd2831feb2fd8f7b41ce1f82e243 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:30:40 -0700 Subject: [IPV6]: Fix memory leak in cleanup_ipv6_mibs() The icmpv6msg mib statistics is not freed. This is almost not critical for current kernel, since ipv6 module is unloadable, but this can happen on load error and will happen every time we stop the network namespace (when we have one, of course). Signed-off-by: Pavel Emelyanov Acked-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bc929381fa46..1b1caf3aa1c1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -747,6 +747,7 @@ static void cleanup_ipv6_mibs(void) { snmp_mib_free((void **)ipv6_statistics); snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); snmp_mib_free((void **)udplite_stats_in6); } -- cgit v1.2.1 From fd9e63544cac30a34c951f0ec958038f0529e244 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:43:37 -0700 Subject: [INET]: Omit double hash calculations in xxx_frag_intern Since the hash value is already calculated in xxx_find, we can simply use it later. This is already done in netfilter code, so make the same in ipv4 and ipv6. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 6ad19cfc2025..0a1bf43bd489 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -233,16 +233,15 @@ out: /* Creation primitives. */ -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) +static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, + unsigned int hash) { struct frag_queue *fq; - unsigned int hash; #ifdef CONFIG_SMP struct hlist_node *n; #endif write_lock(&ip6_frags.lock); - hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr); #ifdef CONFIG_SMP hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { if (fq->id == fq_in->id && @@ -273,7 +272,7 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) static struct frag_queue * ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) + struct inet6_dev *idev, unsigned int hash) { struct frag_queue *fq; @@ -290,7 +289,7 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, spin_lock_init(&fq->q.lock); atomic_set(&fq->q.refcnt, 1); - return ip6_frag_intern(fq); + return ip6_frag_intern(fq, hash); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); @@ -318,7 +317,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, } read_unlock(&ip6_frags.lock); - return ip6_frag_create(id, src, dst, idev); + return ip6_frag_create(id, src, dst, idev, hash); } -- cgit v1.2.1 From 2588fe1d782f1686847493ad643157d5d10bf602 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:44:34 -0700 Subject: [INET]: Consolidate xxx_frag_intern This routine checks for the existence of a given entry in the hash table and inserts the new one if needed. The ->equal callback is used to compare two frag_queue-s together, but this one is temporary and will be removed later. The netfilter code and the ipv6 one use the same routine to compare frags. The inet_frag_intern() always returns non-NULL pointer, so convert the inet_frag_queue into protocol specific one (with the container_of) without any checks. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 34 +++--------------------- net/ipv6/reassembly.c | 46 ++++++++++++--------------------- 2 files changed, 20 insertions(+), 60 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 726fafd41961..d7dc444ec48f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -187,37 +187,10 @@ out: static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, struct nf_ct_frag6_queue *fq_in) { - struct nf_ct_frag6_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&nf_frags.lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - write_unlock(&nf_frags.lock); - fq_in->q.last_in |= COMPLETE; - fq_put(fq_in); - return fq; - } - } -#endif - fq = fq_in; + struct inet_frag_queue *q; - if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout)) - atomic_inc(&fq->q.refcnt); - - atomic_inc(&fq->q.refcnt); - hlist_add_head(&fq->q.list, &nf_frags.hash[hash]); - INIT_LIST_HEAD(&fq->q.lru_list); - list_add_tail(&fq->q.lru_list, &nf_frags.lru_list); - nf_frags.nqueues++; - write_unlock(&nf_frags.lock); - return fq; + q = inet_frag_intern(&fq_in->q, &nf_frags, hash); + return container_of(q, struct nf_ct_frag6_queue, q); } @@ -752,6 +725,7 @@ int nf_ct_frag6_init(void) nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.equal = ip6_frag_equal; inet_frags_init(&nf_frags); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0a1bf43bd489..73ea204eaa6f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -143,6 +143,18 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q) return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); } +int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) +{ + struct frag_queue *fq1, *fq2; + + fq1 = container_of(q1, struct frag_queue, q); + fq2 = container_of(q2, struct frag_queue, q); + return (fq1->id == fq2->id && + ipv6_addr_equal(&fq2->saddr, &fq1->saddr) && + ipv6_addr_equal(&fq2->daddr, &fq1->daddr)); +} +EXPORT_SYMBOL(ip6_frag_equal); + /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -236,37 +248,10 @@ out: static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, unsigned int hash) { - struct frag_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&ip6_frags.lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - write_unlock(&ip6_frags.lock); - fq_in->q.last_in |= COMPLETE; - fq_put(fq_in); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout)) - atomic_inc(&fq->q.refcnt); + struct inet_frag_queue *q; - atomic_inc(&fq->q.refcnt); - hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]); - INIT_LIST_HEAD(&fq->q.lru_list); - list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list); - ip6_frags.nqueues++; - write_unlock(&ip6_frags.lock); - return fq; + q = inet_frag_intern(&fq_in->q, &ip6_frags, hash); + return container_of(q, struct frag_queue, q); } @@ -699,5 +684,6 @@ void __init ipv6_frag_init(void) ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.equal = ip6_frag_equal; inet_frags_init(&ip6_frags); } -- cgit v1.2.1 From e521db9d790aaa60ae8920e21cb7faedc280fc36 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:45:23 -0700 Subject: [INET]: Consolidate xxx_frag_alloc() Just perform the kzalloc() allocation and setup common fields in the inet_frag_queue(). Then return the result to the caller to initialize the rest. The inet_frag_alloc() may return NULL, so check the return value before doing the container_of(). This looks ugly, but the xxx_frag_alloc() will be removed soon. The xxx_expire() timer callbacks are patches, because the argument is now the inet_frag_queue, not the protocol specific queue. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 19 ++++++++----------- net/ipv6/reassembly.c | 19 +++++++------------ 2 files changed, 15 insertions(+), 23 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d7dc444ec48f..3f8c16b3301e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -137,13 +137,10 @@ static void nf_frag_free(struct inet_frag_queue *q) static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) { - struct nf_ct_frag6_queue *fq; + struct inet_frag_queue *q; - fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - if (fq == NULL) - return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem); - return fq; + q = inet_frag_alloc(&nf_frags); + return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL; } /* Destruction primitives. */ @@ -168,7 +165,10 @@ static void nf_ct_frag6_evictor(void) static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + struct nf_ct_frag6_queue *fq; + + fq = container_of((struct inet_frag_queue *)data, + struct nf_ct_frag6_queue, q); spin_lock(&fq->q.lock); @@ -208,10 +208,6 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); - return nf_ct_frag6_intern(hash, fq); oom: @@ -726,6 +722,7 @@ int nf_ct_frag6_init(void) nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.equal = ip6_frag_equal; + nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 73ea204eaa6f..21913c78f053 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -171,12 +171,10 @@ static void ip6_frag_free(struct inet_frag_queue *fq) static inline struct frag_queue *frag_alloc_queue(void) { - struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC); + struct inet_frag_queue *q; - if(!fq) - return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frags.mem); - return fq; + q = inet_frag_alloc(&ip6_frags); + return q ? container_of(q, struct frag_queue, q) : NULL; } /* Destruction primitives. */ @@ -205,9 +203,11 @@ static void ip6_evictor(struct inet6_dev *idev) static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq = (struct frag_queue *) data; + struct frag_queue *fq; struct net_device *dev = NULL; + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + spin_lock(&fq->q.lock); if (fq->q.last_in & COMPLETE) @@ -268,12 +268,6 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - init_timer(&fq->q.timer); - fq->q.timer.function = ip6_frag_expire; - fq->q.timer.data = (long) fq; - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); - return ip6_frag_intern(fq, hash); oom: @@ -685,5 +679,6 @@ void __init ipv6_frag_init(void) ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.equal = ip6_frag_equal; + ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } -- cgit v1.2.1 From c6fda282294da882f8d8cc4c513940277dd380f5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:46:47 -0700 Subject: [INET]: Consolidate xxx_frag_create() This one uses the xxx_frag_intern() and xxx_frag_alloc() routines, which are already consolidated, so remove them from protocol code (as promised). The ->constructor callback is used to init the rest of the frag queue and it is the same for netfilter and ipv6. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 39 +++++++++------------------- net/ipv6/reassembly.c | 45 +++++++++++++++------------------ 2 files changed, 32 insertions(+), 52 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3f8c16b3301e..127d1d842786 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -135,14 +135,6 @@ static void nf_frag_free(struct inet_frag_queue *q) kfree(container_of(q, struct nf_ct_frag6_queue, q)); } -static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) -{ - struct inet_frag_queue *q; - - q = inet_frag_alloc(&nf_frags); - return q ? container_of(q, struct nf_ct_frag6_queue, q) : NULL; -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -184,33 +176,25 @@ out: /* Creation primitives. */ -static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, - struct nf_ct_frag6_queue *fq_in) +static struct nf_ct_frag6_queue * +nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; + struct ip6_create_arg arg; - q = inet_frag_intern(&fq_in->q, &nf_frags, hash); - return container_of(q, struct nf_ct_frag6_queue, q); -} - - -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; + arg.id = id; + arg.src = src; + arg.dst = dst; - if ((fq = frag_alloc_queue()) == NULL) { - pr_debug("Can't alloc new queue\n"); + q = inet_frag_create(&nf_frags, &arg, hash); + if (q == NULL) goto oom; - } - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - - return nf_ct_frag6_intern(hash, fq); + return container_of(q, struct nf_ct_frag6_queue, q); oom: + pr_debug("Can't alloc new queue\n"); return NULL; } @@ -718,6 +702,7 @@ int nf_ct_frag6_init(void) { nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; + nf_frags.constructor = ip6_frag_init; nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 21913c78f053..ce8734028d94 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -164,17 +164,20 @@ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) kfree_skb(skb); } -static void ip6_frag_free(struct inet_frag_queue *fq) +void ip6_frag_init(struct inet_frag_queue *q, void *a) { - kfree(container_of(fq, struct frag_queue, q)); + struct frag_queue *fq = container_of(q, struct frag_queue, q); + struct ip6_create_arg *arg = a; + + fq->id = arg->id; + ipv6_addr_copy(&fq->saddr, arg->src); + ipv6_addr_copy(&fq->daddr, arg->dst); } +EXPORT_SYMBOL(ip6_frag_init); -static inline struct frag_queue *frag_alloc_queue(void) +static void ip6_frag_free(struct inet_frag_queue *fq) { - struct inet_frag_queue *q; - - q = inet_frag_alloc(&ip6_frags); - return q ? container_of(q, struct frag_queue, q) : NULL; + kfree(container_of(fq, struct frag_queue, q)); } /* Destruction primitives. */ @@ -244,31 +247,22 @@ out: /* Creation primitives. */ - -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in, - unsigned int hash) -{ - struct inet_frag_queue *q; - - q = inet_frag_intern(&fq_in->q, &ip6_frags, hash); - return container_of(q, struct frag_queue, q); -} - - static struct frag_queue * ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev, unsigned int hash) { - struct frag_queue *fq; + struct inet_frag_queue *q; + struct ip6_create_arg arg; - if ((fq = frag_alloc_queue()) == NULL) - goto oom; + arg.id = id; + arg.src = src; + arg.dst = dst; - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); + q = inet_frag_create(&ip6_frags, &arg, hash); + if (q == NULL) + goto oom; - return ip6_frag_intern(fq, hash); + return container_of(q, struct frag_queue, q); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); @@ -675,6 +669,7 @@ void __init ipv6_frag_init(void) ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); -- cgit v1.2.1 From abd6523d15f40bfee14652619a31a7f65f77f581 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:47:21 -0700 Subject: [INET]: Consolidate xxx_find() in fragment management Here we need another callback ->match to check whether the entry found in hash matches the key passed. The key used is the same as the creation argument for inet_frag_create. Yet again, this ->match is the same for netfilter and ipv6. Running a frew steps forward - this callback will later replace the ->equal one. Since the inet_frag_find() uses the already consolidated inet_frag_create() remove the xxx_frag_create from protocol codes. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++++----------------- net/ipv6/reassembly.c | 50 +++++++++++++-------------------- 2 files changed, 25 insertions(+), 57 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 127d1d842786..bff63d79c644 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -176,18 +176,19 @@ out: /* Creation primitives. */ -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, - struct in6_addr *dst) +static __inline__ struct nf_ct_frag6_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; + unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - q = inet_frag_create(&nf_frags, &arg, hash); + q = inet_frag_find(&nf_frags, &arg, hash); if (q == NULL) goto oom; @@ -198,28 +199,6 @@ oom: return NULL; } -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - struct hlist_node *n; - unsigned int hash = ip6qhashfn(id, src, dst); - - read_lock(&nf_frags.lock); - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&nf_frags.lock); - return fq; - } - } - read_unlock(&nf_frags.lock); - - return nf_ct_frag6_create(hash, id, src, dst); -} - static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) @@ -706,6 +685,7 @@ int nf_ct_frag6_init(void) nf_frags.destructor = nf_frag_free; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.match = ip6_frag_match; nf_frags.equal = ip6_frag_equal; nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ce8734028d94..11fffe791fc4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -155,6 +155,18 @@ int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) } EXPORT_SYMBOL(ip6_frag_equal); +int ip6_frag_match(struct inet_frag_queue *q, void *a) +{ + struct frag_queue *fq; + struct ip6_create_arg *arg = a; + + fq = container_of(q, struct frag_queue, q); + return (fq->id == arg->id && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst)); +} +EXPORT_SYMBOL(ip6_frag_match); + /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { @@ -245,20 +257,20 @@ out: fq_put(fq); } -/* Creation primitives. */ - -static struct frag_queue * -ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev, unsigned int hash) +static __inline__ struct frag_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { struct inet_frag_queue *q; struct ip6_create_arg arg; + unsigned int hash; arg.id = id; arg.src = src; arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - q = inet_frag_create(&ip6_frags, &arg, hash); + q = inet_frag_find(&ip6_frags, &arg, hash); if (q == NULL) goto oom; @@ -269,31 +281,6 @@ oom: return NULL; } -static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; - struct hlist_node *n; - unsigned int hash; - - read_lock(&ip6_frags.lock); - hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&ip6_frags.lock); - return fq; - } - } - read_unlock(&ip6_frags.lock); - - return ip6_frag_create(id, src, dst, idev, hash); -} - - static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { @@ -673,6 +660,7 @@ void __init ipv6_frag_init(void) ip6_frags.destructor = ip6_frag_free; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; ip6_frags.equal = ip6_frag_equal; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); -- cgit v1.2.1 From 48d60056387c37a17a46feda48613587a90535e5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:47:56 -0700 Subject: [INET]: Remove no longer needed ->equal callback Since this callback is used to check for conflicts in hashtable when inserting a newly created frag queue, we can do the same by checking for matching the queue with the argument, used to create one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 - net/ipv6/reassembly.c | 13 ------------- 2 files changed, 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index bff63d79c644..25746d31504d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -686,7 +686,6 @@ int nf_ct_frag6_init(void) nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; - nf_frags.equal = ip6_frag_equal; nf_frags.frag_expire = nf_ct_frag6_expire; inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 11fffe791fc4..01766bc75b6a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -143,18 +143,6 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q) return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); } -int ip6_frag_equal(struct inet_frag_queue *q1, struct inet_frag_queue *q2) -{ - struct frag_queue *fq1, *fq2; - - fq1 = container_of(q1, struct frag_queue, q); - fq2 = container_of(q2, struct frag_queue, q); - return (fq1->id == fq2->id && - ipv6_addr_equal(&fq2->saddr, &fq1->saddr) && - ipv6_addr_equal(&fq2->daddr, &fq1->daddr)); -} -EXPORT_SYMBOL(ip6_frag_equal); - int ip6_frag_match(struct inet_frag_queue *q, void *a) { struct frag_queue *fq; @@ -661,7 +649,6 @@ void __init ipv6_frag_init(void) ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; - ip6_frags.equal = ip6_frag_equal; ip6_frags.frag_expire = ip6_frag_expire; inet_frags_init(&ip6_frags); } -- cgit v1.2.1 From c95477090a2ace6d241c184adc3fbfcab9c61ceb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 19:48:26 -0700 Subject: [INET]: Consolidate frag queues freeing Since we now allocate the queues in inet_fragment.c, we can safely free it in the same place. The ->destructor callback thus becomes optional for inet_frags. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 7 +------ net/ipv6/reassembly.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 25746d31504d..e170c67c47a5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -130,11 +130,6 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) kfree_skb(skb); } -static void nf_frag_free(struct inet_frag_queue *q) -{ - kfree(container_of(q, struct nf_ct_frag6_queue, q)); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -682,7 +677,7 @@ int nf_ct_frag6_init(void) nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; - nf_frags.destructor = nf_frag_free; + nf_frags.destructor = NULL; nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 01766bc75b6a..76c88a93b9b5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -175,11 +175,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -static void ip6_frag_free(struct inet_frag_queue *fq) -{ - kfree(container_of(fq, struct frag_queue, q)); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct frag_queue *fq) @@ -645,7 +640,7 @@ void __init ipv6_frag_init(void) ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; - ip6_frags.destructor = ip6_frag_free; + ip6_frags.destructor = NULL; ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; -- cgit v1.2.1 From 16910b9829797cda4032fbc84e5292ac7b4474f7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:23:43 -0700 Subject: [IPV6]: Fix return type for snmp6_free_dev() This call is essentially void. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 52d10d213217..edf06ca34749 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -283,12 +283,11 @@ err_ip: return err; } -static int snmp6_free_dev(struct inet6_dev *idev) +static void snmp6_free_dev(struct inet6_dev *idev) { snmp_mib_free((void **)idev->stats.icmpv6msg); snmp_mib_free((void **)idev->stats.icmpv6); snmp_mib_free((void **)idev->stats.ipv6); - return 0; } /* Nobody refers to this device, we may destroy it. */ -- cgit v1.2.1 From aaf70ec7fde2321281b2a49c7c9f881c90d0d208 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:25:32 -0700 Subject: [IPV6]: Cleanup snmp6_alloc_dev() This functions is never called with NULL or not setup argument, so the checks inside are redundant. Also, the return value is always -ENOMEM, so no need in additional variable for this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index edf06ca34749..348bd8d06112 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -255,11 +255,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { - int err = -ENOMEM; - - if (!idev || !idev->dev) - return -EINVAL; - if (snmp_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) @@ -280,7 +275,7 @@ err_icmpmsg: err_icmp: snmp_mib_free((void **)idev->stats.ipv6); err_ip: - return err; + return -ENOMEM; } static void snmp6_free_dev(struct inet6_dev *idev) -- cgit v1.2.1 From 04663d0b8b3c8ce3804106279420cfe5bdfcce3c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:28:06 -0700 Subject: [IPSEC]: Fix pure tunnel modes involving IPv6 I noticed that my recent patch broke 6-on-4 pure IPsec tunnels (the ones that are only used for incompressible IPsec packets). Subsequent reviews show that I broke 6-on-6 pure tunnels more than three years ago and nobody ever noticed. I suppose every must be testing 6-on-6 IPComp with large pings which are very compressible :) This patch fixes both cases. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 3f8a3abde67e..6c67ac197ee0 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -248,7 +248,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - return 0; + return skb_network_header(skb)[IP6CB(skb)->nhoff]; } static int xfrm6_tunnel_rcv(struct sk_buff *skb) -- cgit v1.2.1 From 33b5ecb8f64706d1ed472dcb44162ab3a7345724 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:29:25 -0700 Subject: [IPSEC]: Get nexthdr from caller in xfrm6_rcv_spi Currently xfrm6_rcv_spi gets the nexthdr value itself from the packet. This means that we need to fix up the value in case we have a 4-on-6 tunnel. Moving this logic into the caller simplifies things and allows us to merge the code with IPv4. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 9 ++++----- net/ipv6/xfrm6_tunnel.c | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 02f69e544f6f..596a730294ec 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,7 +16,7 @@ #include #include -int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { int err; __be32 seq; @@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int nexthdr; unsigned int nhoff; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) @@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6); + nexthdr, AF_INET6); if (x == NULL) goto drop; spin_lock(&x->lock); @@ -135,7 +133,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_rcv(struct sk_buff *skb) { - return xfrm6_rcv_spi(skb, 0); + return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 6c67ac197ee0..fae90ff31087 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -257,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, spi) > 0 ? : 0; + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.2.1 From 7aa68cb90638ccc36559a936814e4c089892b3d9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:30:07 -0700 Subject: [IPSEC]: Move ip_summed zapping out of xfrm6_rcv_spi Not every transform needs to zap ip_summed. For example, a pure tunnel mode encapsulation does not affect the hardware checksum at all. In fact, every algorithm (that needs this) other than AH6 already does its own ip_summed zapping. This patch moves the zapping into AH6 which is in line with what IPv4 does. Possible future optimisation: Checksum the data as we copy them in IPComp. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 2 ++ net/ipv6/xfrm6_input.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f9f689162692..a8221d1da0ff 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -344,6 +344,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto out; + skb->ip_summed = CHECKSUM_NONE; + hdr_len = skb->data - skb_network_header(skb); ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 596a730294ec..b1201c33eb12 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -97,7 +97,6 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, xfrm_nr * sizeof(xfrm_vec[0])); skb->sp->len += xfrm_nr; - skb->ip_summed = CHECKSUM_NONE; nf_reset(skb); -- cgit v1.2.1 From 1bfcb10f670f5ff5e1d9f53e59680573524cb142 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:31:50 -0700 Subject: [IPSEC]: Add missing BEET checks Currently BEET mode does not reinject the packet back into the stack like tunnel mode does. Since BEET should behave just like tunnel mode this is incorrect. This patch fixes this by introducing a flags field to xfrm_mode that tells the IPsec code whether it should terminate and reinject the packet back into the stack. It then sets the flag for BEET and tunnel mode. I've also added a number of missing BEET checks elsewhere where we check whether a given mode is a tunnel or not. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_mode_beet.c | 1 + net/ipv6/xfrm6_mode_tunnel.c | 1 + net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 3 +-- net/ipv6/xfrm6_state.c | 6 ++++-- 6 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b1201c33eb12..c6ee1a3ba19a 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -71,7 +71,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 13bb1e856764..2bfb4f05c14c 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -79,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = { .output = xfrm6_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_beet_init(void) diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index ea2283879112..fd84e2217274 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -118,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = { .output = xfrm6_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index a5a32c17249d..c9f42d1c2dff 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 15aa4c58c315..dc4bdcb55cbe 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL || - xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch(encap_family) { case AF_INET: diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index cdadb4847469..e644c80515fc 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->props.mode == XFRM_MODE_TUNNEL) { + (src[i]->props.mode == XFRM_MODE_TUNNEL || + src[i]->props.mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) /* Rule 3: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->mode == XFRM_MODE_TUNNEL) { + (src[i]->mode == XFRM_MODE_TUNNEL || + src[i]->mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } -- cgit v1.2.1 From 17c2a42a24e1e8dd6aa7cea4f84e034ab1bfff31 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:33:12 -0700 Subject: [IPSEC]: Store afinfo pointer in xfrm_mode It is convenient to have a pointer from xfrm_state to address-specific functions such as the output function for a family. Currently the address-specific policy code calls out to the xfrm state code to get those pointers when we could get it in an easier way via the state itself. This patch adds an xfrm_state_afinfo to xfrm_mode (since they're address-specific) and changes the policy code to use it. I've also added an owner field to do reference counting on the module providing the afinfo even though it isn't strictly necessary today since IPv6 can't be unloaded yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 14 +------------- net/ipv6/xfrm6_state.c | 1 + 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dc4bdcb55cbe..324268329f69 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -231,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv4 is implemented as module and can be unloaded, - * we should manage reference to xfrm4_output in afinfo->output. - * Miyazawa - */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - goto error; - } - - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index e644c80515fc..b392bee396f1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -170,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, -- cgit v1.2.1 From ca68145f16359f71cd62b2671aa3e8c58f45ef19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:35:15 -0700 Subject: [IPSEC]: Disallow combinations of RO and AH/ESP/IPCOMP Combining RO and AH/ESP/IPCOMP does not make sense. So this patch adds a check in the state initialisation function to prevent this. This allows us to safely remove the mode input function of RO since it can never be called anymore. Indeed, if somehow it does get called we'll know about it through an OOPS instead of it slipping past silently. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 9 ++++++++- net/ipv6/esp6.c | 9 ++++++++- net/ipv6/ipcomp6.c | 9 ++++++++- net/ipv6/xfrm6_mode_ro.c | 9 --------- 4 files changed, 24 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index a8221d1da0ff..67cd06613a25 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -477,8 +477,15 @@ static int ah6_init_state(struct xfrm_state *x) x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } x->data = ahp; return 0; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9eb928598351..b0715432e454 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -354,8 +354,15 @@ static int esp6_init_state(struct xfrm_state *x) (x->ealg->alg_key_len + 7) / 8)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } x->data = esp; return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 28fc8edfdc3a..80ef2a1d39fd 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -411,8 +411,15 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } mutex_lock(&ipcomp6_resource_mutex); if (!ipcomp6_alloc_scratches()) diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 957ae36b6695..a7bc8c62317a 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -58,16 +58,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -/* - * Do nothing about routing optimization header unlike IPsec. - */ -static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return 0; -} - static struct xfrm_mode xfrm6_ro_mode = { - .input = xfrm6_ro_input, .output = xfrm6_ro_output, .owner = THIS_MODULE, .encap = XFRM_MODE_ROUTEOPTIMIZATION, -- cgit v1.2.1 From 13996378e6585fb25e582afe7489bf52dde78deb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:35:51 -0700 Subject: [IPSEC]: Rename mode to outer_mode and add inner_mode This patch adds a new field to xfrm states called inner_mode. The existing mode object is renamed to outer_mode. This is the first part of an attempt to fix inter-family transforms. As it is we always use the outer family when determining which mode to use. As a result we may end up shoving IPv4 packets into netfilter6 and vice versa. What we really want is to use the inner family for the first part of outbound processing and the outer family for the second part. For inbound processing we'd use the opposite pairing. I've also added a check to prevent silly combinations such as transport mode with inter-family transforms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_input.c | 4 ++-- net/ipv6/xfrm6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c6ee1a3ba19a..515783707e86 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -68,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c9f42d1c2dff..656976760ad4 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 324268329f69..82e27b80d07d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -230,7 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = dst_prev->xfrm->mode->afinfo->output; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); -- cgit v1.2.1 From 04028045a12ba941c579d0f3238489333ac18ea4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:14:58 -0700 Subject: [IPV6]: Lost locking when inserting a flowlabel in ipv6_fl_list The new flowlabels should be inserted into the sock list under the ip6_sk_fl_lock. This was lost in one place. This list is naturally protected with the socket lock, but the fl6_sock_lookup() is called without it, so another protection is required. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 217d60f9fc80..8550df20f984 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -409,6 +409,16 @@ static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) return 0; } +static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, + struct ip6_flowlabel *fl) +{ + write_lock_bh(&ip6_sk_fl_lock); + sfl->fl = fl; + sfl->next = np->ipv6_fl_list; + np->ipv6_fl_list = sfl; + write_unlock_bh(&ip6_sk_fl_lock); +} + int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { int err; @@ -513,11 +523,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; - write_lock_bh(&ip6_sk_fl_lock); - sfl1->fl = fl1; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; - write_unlock_bh(&ip6_sk_fl_lock); + fl_link(np, sfl1, fl1); fl_free(fl); return 0; @@ -545,9 +551,7 @@ release: } } - sfl1->fl = fl; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; + fl_link(np, sfl1, fl); return 0; default: -- cgit v1.2.1 From bd0bf57700cb0eaa92f3d2ee040a69743cdd99d0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:15:57 -0700 Subject: [IPV6]: Lost locking in fl6_sock_lookup This routine scans the ipv6_fl_list whose update is protected with the socket lock and the ip6_sk_fl_lock. Since the socket lock is not taken in the lookup, use the other one. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 8550df20f984..f40a08669db0 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -190,14 +190,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; + read_lock_bh(&ip6_sk_fl_lock); for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { + read_unlock_bh(&ip6_sk_fl_lock); fl->lastuse = jiffies; atomic_inc(&fl->users); return fl; } } + read_unlock_bh(&ip6_sk_fl_lock); return NULL; } -- cgit v1.2.1 From 78c2e50253569e62caa4a61fc1cc5a0158edec43 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:18:56 -0700 Subject: [IPV6]: Fix race in ipv6_flowlabel_opt() when inserting two labels In the IPV6_FL_A_GET case the hash is checked for flowlabels with the given label. If it is not found, the lock, protecting the hash, is dropped to be re-get for writing. After this a newly allocated entry is inserted, but no checks are performed to catch a classical SMP race, when the conflicting label may be inserted on another cpu. Use the (currently unused) return value from fl_intern() to return the conflicting entry (if found) and re-check, whether we can reuse it (IPV6_FL_F_EXCL) or return -EEXISTS. Also add the comment, about why not re-lookup the current sock for conflicting flowlabel entry. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f40a08669db0..e55ae1a1f560 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -154,8 +154,10 @@ static void ip6_fl_gc(unsigned long dummy) write_unlock(&ip6_fl_lock); } -static int fl_intern(struct ip6_flowlabel *fl, __be32 label) +static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) { + struct ip6_flowlabel *lfl; + fl->label = label & IPV6_FLOWLABEL_MASK; write_lock_bh(&ip6_fl_lock); @@ -163,12 +165,26 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; if (fl->label) { - struct ip6_flowlabel *lfl; lfl = __fl_lookup(fl->label); if (lfl == NULL) break; } } + } else { + /* + * we dropper the ip6_fl_lock, so this entry could reappear + * and we need to recheck with it. + * + * OTOH no need to search the active socket first, like it is + * done in ipv6_flowlabel_opt - sock is locked, so new entry + * with the same label can only appear on another sock + */ + lfl = __fl_lookup(fl->label); + if (lfl != NULL) { + atomic_inc(&lfl->users); + write_unlock_bh(&ip6_fl_lock); + return lfl; + } } fl->lastuse = jiffies; @@ -176,7 +192,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) fl_ht[FL_HASH(fl->label)] = fl; atomic_inc(&fl_size); write_unlock_bh(&ip6_fl_lock); - return 0; + return NULL; } @@ -429,7 +445,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; struct ipv6_fl_socklist *sfl, **sflp; - struct ip6_flowlabel *fl; + struct ip6_flowlabel *fl, *fl1 = NULL; + if (optlen < sizeof(freq)) return -EINVAL; @@ -485,8 +502,6 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); if (freq.flr_label) { - struct ip6_flowlabel *fl1 = NULL; - err = -EEXIST; read_lock_bh(&ip6_sk_fl_lock); for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { @@ -505,6 +520,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (fl1 == NULL) fl1 = fl_lookup(freq.flr_label); if (fl1) { +recheck: err = -EEXIST; if (freq.flr_flags&IPV6_FL_F_EXCL) goto release; @@ -543,9 +559,9 @@ release: if (sfl1 == NULL || (err = mem_check(sk)) != 0) goto done; - err = fl_intern(fl, freq.flr_label); - if (err) - goto done; + fl1 = fl_intern(fl, freq.flr_label); + if (fl1 != NULL) + goto recheck; if (!freq.flr_label) { if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, -- cgit v1.2.1 From 52f095ee88d8851866bc7694ab991ca5abf21d5e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 05:38:48 -0700 Subject: [IPV6]: Fix again the fl6_sock_lookup() fixed locking YOSHIFUJI fairly pointed out, that the users increment should be done under the ip6_sk_fl_lock not to give IPV6_FL_A_PUT a chance to put this count to zero and release the flowlabel. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index e55ae1a1f560..b12cc22e7745 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -210,9 +210,9 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { - read_unlock_bh(&ip6_sk_fl_lock); fl->lastuse = jiffies; atomic_inc(&fl->users); + read_unlock_bh(&ip6_sk_fl_lock); return fl; } } -- cgit v1.2.1