From 84018f55ab883f03d41ec3c9ac7f0cc80830b20f Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Mon, 23 Apr 2012 03:35:26 +0000
Subject: netfilter: ip6_tables: add flags parameter to ipv6_find_hdr()

This patch adds the flags parameter to ipv6_find_hdr. This flags
allows us to:

* know if this is a fragment.
* stop at the AH header, so the information contained in that header
  can be used for some specific packet handling.

This patch also adds the offset parameter for inspection of one
inner IPv6 header that is contained in error messages.

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6/ip6_tables.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 1bc898b14a80..08c2cbbaa32b 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -298,9 +298,14 @@ ip6t_ext_hdr(u8 nexthdr)
 	       (nexthdr == IPPROTO_DSTOPTS);
 }
 
+enum {
+	IP6T_FH_F_FRAG	= (1 << 0),
+	IP6T_FH_F_AUTH	= (1 << 1),
+};
+
 /* find specified header and get offset to it */
 extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
-			 int target, unsigned short *fragoff);
+			 int target, unsigned short *fragoff, int *fragflg);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
-- 
cgit v1.2.3


From cf308a1fae432f315989e2da6878bfaa3daa22b1 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Wed, 2 May 2012 07:49:47 +0000
Subject: netfilter: add xt_hmark target for hash-based skb marking

The target allows you to create rules in the "raw" and "mangle" tables
which set the skbuff mark by means of hash calculation within a given
range. The nfmark can influence the routing method (see "Use netfilter
MARK value as routing key") and can also be used by other subsystems to
change their behaviour.

[ Part of this patch has been refactorized and modified by Pablo Neira Ayuso ]

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_HMARK.h |  45 +++++
 net/netfilter/Kconfig              |  15 ++
 net/netfilter/Makefile             |   1 +
 net/netfilter/xt_HMARK.c           | 362 +++++++++++++++++++++++++++++++++++++
 4 files changed, 423 insertions(+)
 create mode 100644 include/linux/netfilter/xt_HMARK.h
 create mode 100644 net/netfilter/xt_HMARK.c

(limited to 'include')

diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h
new file mode 100644
index 000000000000..abb1650940d2
--- /dev/null
+++ b/include/linux/netfilter/xt_HMARK.h
@@ -0,0 +1,45 @@
+#ifndef XT_HMARK_H_
+#define XT_HMARK_H_
+
+#include <linux/types.h>
+
+enum {
+	XT_HMARK_SADDR_MASK,
+	XT_HMARK_DADDR_MASK,
+	XT_HMARK_SPI,
+	XT_HMARK_SPI_MASK,
+	XT_HMARK_SPORT,
+	XT_HMARK_DPORT,
+	XT_HMARK_SPORT_MASK,
+	XT_HMARK_DPORT_MASK,
+	XT_HMARK_PROTO_MASK,
+	XT_HMARK_RND,
+	XT_HMARK_MODULUS,
+	XT_HMARK_OFFSET,
+	XT_HMARK_CT,
+	XT_HMARK_METHOD_L3,
+	XT_HMARK_METHOD_L3_4,
+};
+#define XT_HMARK_FLAG(flag)	(1 << flag)
+
+union hmark_ports {
+	struct {
+		__u16	src;
+		__u16	dst;
+	} p16;
+	__u32	v32;
+};
+
+struct xt_hmark_info {
+	union nf_inet_addr	src_mask;
+	union nf_inet_addr	dst_mask;
+	union hmark_ports	port_mask;
+	union hmark_ports	port_set;
+	__u32			flags;
+	__u16			proto_mask;
+	__u32			hashrnd;
+	__u32			hmodulus;
+	__u32			hoffset;	/* Mark offset to start from */
+};
+
+#endif /* XT_HMARK_H_ */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0c6f67e8f2e5..209c1ed43368 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -509,6 +509,21 @@ config NETFILTER_XT_TARGET_HL
 	since you can easily create immortal packets that loop
 	forever on the network.
 
+config NETFILTER_XT_TARGET_HMARK
+	tristate '"HMARK" target support'
+	depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds the "HMARK" target.
+
+	The target allows you to create rules in the "raw" and "mangle" tables
+	which set the skbuff mark by means of hash calculation within a given
+	range. The nfmark can influence the routing method (see "Use netfilter
+	MARK value as routing key") and can also be used by other subsystems to
+	change their behaviour.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_TARGET_IDLETIMER
 	tristate  "IDLETIMER target support"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ca3676586f51..4e7960cc7b97 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
new file mode 100644
index 000000000000..32fbd735d02b
--- /dev/null
+++ b/net/netfilter/xt_HMARK.c
@@ -0,0 +1,362 @@
+/*
+ * xt_HMARK - Netfilter module to set mark by means of hashing
+ *
+ * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
+ * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_HMARK.h>
+
+#include <net/ip.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+#include <net/ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
+MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
+MODULE_ALIAS("ipt_HMARK");
+MODULE_ALIAS("ip6t_HMARK");
+
+struct hmark_tuple {
+	u32			src;
+	u32			dst;
+	union hmark_ports	uports;
+	uint8_t			proto;
+};
+
+static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
+{
+	return (addr32[0] & mask[0]) ^
+	       (addr32[1] & mask[1]) ^
+	       (addr32[2] & mask[2]) ^
+	       (addr32[3] & mask[3]);
+}
+
+static inline u32
+hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
+{
+	switch (l3num) {
+	case AF_INET:
+		return *addr32 & *mask;
+	case AF_INET6:
+		return hmark_addr6_mask(addr32, mask);
+	}
+	return 0;
+}
+
+static int
+hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
+		    const struct xt_hmark_info *info)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conntrack_tuple *otuple;
+	struct nf_conntrack_tuple *rtuple;
+
+	if (ct == NULL || nf_ct_is_untracked(ct))
+		return -1;
+
+	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all,
+				 info->src_mask.all);
+	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all,
+				 info->dst_mask.all);
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
+		return 0;
+
+	t->proto = nf_ct_protonum(ct);
+	if (t->proto != IPPROTO_ICMP) {
+		t->uports.p16.src = otuple->src.u.all;
+		t->uports.p16.dst = rtuple->src.u.all;
+		t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
+				info->port_set.v32;
+		if (t->uports.p16.dst < t->uports.p16.src)
+			swap(t->uports.p16.dst, t->uports.p16.src);
+	}
+
+	return 0;
+#else
+	return -1;
+#endif
+}
+
+static inline u32
+hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
+{
+	u32 hash;
+
+	if (t->dst < t->src)
+		swap(t->src, t->dst);
+
+	hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd);
+	hash = hash ^ (t->proto & info->proto_mask);
+
+	return (hash % info->hmodulus) + info->hoffset;
+}
+
+static void
+hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
+		      struct hmark_tuple *t, const struct xt_hmark_info *info)
+{
+	int protoff;
+
+	protoff = proto_ports_offset(t->proto);
+	if (protoff < 0)
+		return;
+
+	nhoff += protoff;
+	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
+		return;
+
+	t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
+			info->port_set.v32;
+
+	if (t->uports.p16.dst < t->uports.p16.src)
+		swap(t->uports.p16.dst, t->uports.p16.src);
+}
+
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
+{
+	struct icmp6hdr *icmp6h, _ih6;
+
+	icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
+	if (icmp6h == NULL)
+		return 0;
+
+	if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
+		*offset += sizeof(struct icmp6hdr);
+		return 1;
+	}
+	return 0;
+}
+
+static int
+hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
+			  const struct xt_hmark_info *info)
+{
+	struct ipv6hdr *ip6, _ip6;
+	int flag = IP6T_FH_F_AUTH;
+	unsigned int nhoff = 0;
+	u16 fragoff = 0;
+	int nexthdr;
+
+	ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
+	nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
+	if (nexthdr < 0)
+		return 0;
+	/* No need to check for icmp errors on fragments */
+	if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
+		goto noicmp;
+	/* Use inner header in case of ICMP errors */
+	if (get_inner6_hdr(skb, &nhoff)) {
+		ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
+		if (ip6 == NULL)
+			return -1;
+		/* If AH present, use SPI like in ESP. */
+		flag = IP6T_FH_F_AUTH;
+		nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
+		if (nexthdr < 0)
+			return -1;
+	}
+noicmp:
+	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all);
+	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all);
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
+		return 0;
+
+	t->proto = nexthdr;
+	if (t->proto == IPPROTO_ICMPV6)
+		return 0;
+
+	if (flag & IP6T_FH_F_FRAG)
+		return 0;
+
+	hmark_set_tuple_ports(skb, nhoff, t, info);
+	return 0;
+}
+
+static unsigned int
+hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_hmark_info *info = par->targinfo;
+	struct hmark_tuple t;
+
+	memset(&t, 0, sizeof(struct hmark_tuple));
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
+		if (hmark_ct_set_htuple(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	} else {
+		if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	}
+
+	skb->mark = hmark_hash(&t, info);
+	return XT_CONTINUE;
+}
+#endif
+
+static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
+{
+	const struct icmphdr *icmph;
+	struct icmphdr _ih;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
+	if (icmph == NULL && icmph->type > NR_ICMP_TYPES)
+		return 0;
+
+	/* Error message? */
+	if (icmph->type != ICMP_DEST_UNREACH &&
+	    icmph->type != ICMP_SOURCE_QUENCH &&
+	    icmph->type != ICMP_TIME_EXCEEDED &&
+	    icmph->type != ICMP_PARAMETERPROB &&
+	    icmph->type != ICMP_REDIRECT)
+		return 0;
+
+	*nhoff += iphsz + sizeof(_ih);
+	return 1;
+}
+
+static int
+hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
+			  const struct xt_hmark_info *info)
+{
+	struct iphdr *ip, _ip;
+	int nhoff = skb_network_offset(skb);
+
+	ip = (struct iphdr *) (skb->data + nhoff);
+	if (ip->protocol == IPPROTO_ICMP) {
+		/* Use inner header in case of ICMP errors */
+		if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
+			ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
+			if (ip == NULL)
+				return -1;
+		}
+	}
+
+	t->src = (__force u32) ip->saddr;
+	t->dst = (__force u32) ip->daddr;
+
+	t->src &= info->src_mask.ip;
+	t->dst &= info->dst_mask.ip;
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
+		return 0;
+
+	t->proto = ip->protocol;
+
+	/* ICMP has no ports, skip */
+	if (t->proto == IPPROTO_ICMP)
+		return 0;
+
+	/* follow-up fragments don't contain ports, skip all fragments */
+	if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+		return 0;
+
+	hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
+
+	return 0;
+}
+
+static unsigned int
+hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_hmark_info *info = par->targinfo;
+	struct hmark_tuple t;
+
+	memset(&t, 0, sizeof(struct hmark_tuple));
+
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
+		if (hmark_ct_set_htuple(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	} else {
+		if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
+			return XT_CONTINUE;
+	}
+
+	skb->mark = hmark_hash(&t, info);
+	return XT_CONTINUE;
+}
+
+static int hmark_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_hmark_info *info = par->targinfo;
+
+	if (!info->hmodulus) {
+		pr_info("xt_HMARK: hash modulus can't be zero\n");
+		return -EINVAL;
+	}
+	if (info->proto_mask &&
+	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) {
+		pr_info("xt_HMARK: proto mask must be zero with L3 mode\n");
+		return -EINVAL;
+	}
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
+	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
+			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) {
+		pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n");
+		return -EINVAL;
+	}
+	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
+	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
+			     XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
+		pr_info("xt_HMARK: spi-set and port-set can't be combined\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct xt_target hmark_tg_reg[] __read_mostly = {
+	{
+		.name		= "HMARK",
+		.family		= NFPROTO_IPV4,
+		.target		= hmark_tg_v4,
+		.targetsize	= sizeof(struct xt_hmark_info),
+		.checkentry	= hmark_tg_check,
+		.me		= THIS_MODULE,
+	},
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+	{
+		.name		= "HMARK",
+		.family		= NFPROTO_IPV6,
+		.target		= hmark_tg_v6,
+		.targetsize	= sizeof(struct xt_hmark_info),
+		.checkentry	= hmark_tg_check,
+		.me		= THIS_MODULE,
+	},
+#endif
+};
+
+static int __init hmark_tg_init(void)
+{
+	return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
+}
+
+static void __exit hmark_tg_exit(void)
+{
+	xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
+}
+
+module_init(hmark_tg_init);
+module_exit(hmark_tg_exit);
-- 
cgit v1.2.3


From 0197dee7d3182bb6b6a21955860dfa14fa022d84 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 7 May 2012 10:51:45 +0000
Subject: netfilter: hashlimit: byte-based limit mode

can be used e.g. for ingress traffic policing or
to detect when a host/port consumes more bandwidth than expected.

This is done by optionally making cost to mean
"cost per 16-byte-chunk-of-data" instead of "cost per packet".

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_hashlimit.h |  10 ++-
 net/netfilter/xt_hashlimit.c           | 116 +++++++++++++++++++++++++++------
 2 files changed, 106 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index b1925b5925e9..05fe7993dd76 100644
--- a/include/linux/netfilter/xt_hashlimit.h
+++ b/include/linux/netfilter/xt_hashlimit.h
@@ -6,7 +6,11 @@
 /* timings are in milliseconds. */
 #define XT_HASHLIMIT_SCALE 10000
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
-   seconds, or one every 59 hours. */
+ * seconds, or one packet every 59 hours.
+ */
+
+/* packet length accounting is done in 16-byte steps */
+#define XT_HASHLIMIT_BYTE_SHIFT 4
 
 /* details of this structure hidden by the implementation */
 struct xt_hashlimit_htable;
@@ -17,6 +21,10 @@ enum {
 	XT_HASHLIMIT_HASH_SIP = 1 << 2,
 	XT_HASHLIMIT_HASH_SPT = 1 << 3,
 	XT_HASHLIMIT_INVERT   = 1 << 4,
+	XT_HASHLIMIT_BYTES    = 1 << 5,
+#ifdef __KERNEL__
+	XT_HASHLIMIT_MAX      = 1 << 6,
+#endif
 };
 
 struct hashlimit_cfg {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b6bbd0630e5f..d0424f9621f2 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -388,6 +388,18 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
 
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
+/* in byte mode, the lowest possible rate is one packet/second.
+ * credit_cap is used as a counter that tells us how many times we can
+ * refill the "credits available" counter when it becomes empty.
+ */
+#define MAX_CPJ_BYTES (0xFFFFFFFF / HZ)
+#define CREDITS_PER_JIFFY_BYTES POW2_BELOW32(MAX_CPJ_BYTES)
+
+static u32 xt_hashlimit_len_to_chunks(u32 len)
+{
+	return (len >> XT_HASHLIMIT_BYTE_SHIFT) + 1;
+}
+
 /* Precision saver. */
 static u32 user2credits(u32 user)
 {
@@ -399,21 +411,53 @@ static u32 user2credits(u32 user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
 }
 
-static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
+static u32 user2credits_byte(u32 user)
 {
-	dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
-	if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
-		dh->rateinfo.credit = dh->rateinfo.credit_cap;
+	u64 us = user;
+	us *= HZ * CREDITS_PER_JIFFY_BYTES;
+	return (u32) (us >> 32);
+}
+
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
+{
+	unsigned long delta = now - dh->rateinfo.prev;
+	u32 cap;
+
+	if (delta == 0)
+		return;
+
 	dh->rateinfo.prev = now;
+
+	if (mode & XT_HASHLIMIT_BYTES) {
+		u32 tmp = dh->rateinfo.credit;
+		dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
+		cap = CREDITS_PER_JIFFY_BYTES * HZ;
+		if (tmp >= dh->rateinfo.credit) {/* overflow */
+			dh->rateinfo.credit = cap;
+			return;
+		}
+	} else {
+		dh->rateinfo.credit += delta * CREDITS_PER_JIFFY;
+		cap = dh->rateinfo.credit_cap;
+	}
+	if (dh->rateinfo.credit > cap)
+		dh->rateinfo.credit = cap;
 }
 
 static void rateinfo_init(struct dsthash_ent *dh,
 			  struct xt_hashlimit_htable *hinfo)
 {
 	dh->rateinfo.prev = jiffies;
-	dh->rateinfo.credit = user2credits(hinfo->cfg.avg * hinfo->cfg.burst);
-	dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-	dh->rateinfo.credit_cap = dh->rateinfo.credit;
+	if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
+		dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
+		dh->rateinfo.credit_cap = hinfo->cfg.burst;
+	} else {
+		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+						   hinfo->cfg.burst);
+		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+		dh->rateinfo.credit_cap = dh->rateinfo.credit;
+	}
 }
 
 static inline __be32 maskl(__be32 a, unsigned int l)
@@ -519,6 +563,21 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 	return 0;
 }
 
+static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
+{
+	u64 tmp = xt_hashlimit_len_to_chunks(len);
+	tmp = tmp * dh->rateinfo.cost;
+
+	if (unlikely(tmp > CREDITS_PER_JIFFY_BYTES * HZ))
+		tmp = CREDITS_PER_JIFFY_BYTES * HZ;
+
+	if (dh->rateinfo.credit < tmp && dh->rateinfo.credit_cap) {
+		dh->rateinfo.credit_cap--;
+		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
+	}
+	return (u32) tmp;
+}
+
 static bool
 hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -527,6 +586,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
+	u32 cost;
 
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
@@ -544,12 +604,17 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	} else {
 		/* update expiration timeout */
 		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-		rateinfo_recalc(dh, now);
+		rateinfo_recalc(dh, now, hinfo->cfg.mode);
 	}
 
-	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
+	if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+		cost = hashlimit_byte_cost(skb->len, dh);
+	else
+		cost = dh->rateinfo.cost;
+
+	if (dh->rateinfo.credit >= cost) {
 		/* below the limit */
-		dh->rateinfo.credit -= dh->rateinfo.cost;
+		dh->rateinfo.credit -= cost;
 		spin_unlock(&dh->lock);
 		rcu_read_unlock_bh();
 		return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
@@ -571,14 +636,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	int ret;
 
-	/* Check for overflow. */
-	if (info->cfg.burst == 0 ||
-	    user2credits(info->cfg.avg * info->cfg.burst) <
-	    user2credits(info->cfg.avg)) {
-		pr_info("overflow, try lower: %u/%u\n",
-			info->cfg.avg, info->cfg.burst);
-		return -ERANGE;
-	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
 		return -EINVAL;
 	if (info->name[sizeof(info->name)-1] != '\0')
@@ -591,6 +648,26 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 			return -EINVAL;
 	}
 
+	if (info->cfg.mode >= XT_HASHLIMIT_MAX) {
+		pr_info("Unknown mode mask %X, kernel too old?\n",
+						info->cfg.mode);
+		return -EINVAL;
+	}
+
+	/* Check for overflow. */
+	if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
+		if (user2credits_byte(info->cfg.avg) == 0) {
+			pr_info("overflow, rate too high: %u\n", info->cfg.avg);
+			return -EINVAL;
+		}
+	} else if (info->cfg.burst == 0 ||
+		    user2credits(info->cfg.avg * info->cfg.burst) <
+		    user2credits(info->cfg.avg)) {
+			pr_info("overflow, try lower: %u/%u\n",
+				info->cfg.avg, info->cfg.burst);
+			return -ERANGE;
+	}
+
 	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->family);
 	if (info->hinfo == NULL) {
@@ -683,10 +760,11 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				   struct seq_file *s)
 {
 	int res;
+	const struct xt_hashlimit_htable *ht = s->private;
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
-	rateinfo_recalc(ent, jiffies);
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode);
 
 	switch (family) {
 	case NFPROTO_IPV4:
-- 
cgit v1.2.3