From 723e98b79c5f2dd97ce559506362844b1a086f80 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 15 May 2007 22:46:18 -0700 Subject: [NET]: lockdep classes in register_netdevice After initializing dev->_xmit_lock register_netdevice() sets lockdep class according to dev->type. Idea of this patch - by David Miller. Reported & tested by: "Yuriy N. Shkandybin" Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/dev.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 8301e2ac747f..f2b61111e26d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -116,6 +116,7 @@ #include #include #include +#include /* * The list of packet types we will receive (as opposed to discard) @@ -217,6 +218,73 @@ extern void netdev_unregister_sysfs(struct net_device *); #define netdev_unregister_sysfs(dev) do { } while(0) #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * according to dev->type + */ +static const unsigned short netdev_lock_type[] = + {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, + ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, + ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, + ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, + ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, + ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, + ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, + ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, + ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, + ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, + ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, + ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, + ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, + ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, + ARPHRD_NONE}; + +static const char *netdev_lock_name[] = + {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", + "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", + "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", + "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", + "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", + "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", + "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", + "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", + "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", + "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", + "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", + "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", + "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", + "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", + "_xmit_NONE"}; + +static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; + +static inline unsigned short netdev_lock_pos(unsigned short dev_type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) + if (netdev_lock_type[i] == dev_type) + return i; + /* the last key is used by default */ + return ARRAY_SIZE(netdev_lock_type) - 1; +} + +static inline void netdev_set_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ + int i; + + i = netdev_lock_pos(dev_type); + lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], + netdev_lock_name[i]); +} +#else +static inline void netdev_set_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +#endif /******************************************************************************* @@ -3001,6 +3069,7 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); + netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); dev->xmit_lock_owner = -1; spin_lock_init(&dev->ingress_lock); -- cgit v1.2.1 From 689d79469b64662440a93ffbd1cbf994510bcbf6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 May 2007 00:30:09 -0700 Subject: [NET]: Fix BMSR_100{HALF,FULL}2 defines in linux/mii.h Noticed by Matvejchikov Ilya. Signed-off-by: David S. Miller --- include/linux/mii.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mii.h b/include/linux/mii.h index beddc6d3b0f6..151b7e0182c7 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -56,8 +56,8 @@ #define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */ #define BMSR_RESV 0x00c0 /* Unused... */ #define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ -#define BMSR_100FULL2 0x0200 /* Can do 100BASE-T2 HDX */ -#define BMSR_100HALF2 0x0400 /* Can do 100BASE-T2 FDX */ +#define BMSR_100HALF2 0x0200 /* Can do 100BASE-T2 HDX */ +#define BMSR_100FULL2 0x0400 /* Can do 100BASE-T2 FDX */ #define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ #define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ #define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ -- cgit v1.2.1 From 4ce61d1c7a8ef4c1337fa983a3036d4010e3c19e Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Wed, 16 May 2007 23:50:16 -0700 Subject: [BLUETOOTH]: Fix locking in hci_sock_dev_event(). We presently use lock_sock() to acquire a lock on a socket in hci_sock_dev_event(), but this goes BUG because lock_sock() can sleep and we're already holding a read-write spinlock at that point. So, we must use the non-sleeping BH version, bh_lock_sock(). However, hci_sock_dev_event() is called from user context and hence using simply bh_lock_sock() will deadlock against a concurrent softirq that tries to acquire a lock on the same socket. Hence, disabling BH's before acquiring the socket lock and enable them afterwards, is the proper solution to fix socket locking in hci_sock_dev_event(). Signed-off-by: Satyam Sharma Signed-off-by: Marcel Holtmann Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- net/bluetooth/hci_sock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index bfc9a35bad33..1dae3dfc66a9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -665,7 +665,8 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, node, &hci_sk_list.head) { - lock_sock(sk); + local_bh_disable(); + bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; @@ -674,7 +675,8 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, hci_dev_put(hdev); } - release_sock(sk); + bh_unlock_sock(sk); + local_bh_enable(); } read_unlock(&hci_sk_list.lock); } -- cgit v1.2.1 From a02ba041664171563e6418ccdf3b363d32d6a43b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 17 May 2007 00:04:18 -0700 Subject: [TCP] slow start: Make comments and code logic clearer. Add more comments to describe our version of tcp_slow_start(). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 86b26539e54b..1260e52ad772 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -276,30 +276,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) /* - * Slow start (exponential increase) with - * RFC3742 Limited Slow Start (fast linear increase) support. + * Slow start is used when congestion window is less than slow start + * threshold. This version implements the basic RFC2581 version + * and optionally supports: + * RFC3742 Limited Slow Start - growth limited to max_ssthresh + * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged */ void tcp_slow_start(struct tcp_sock *tp) { - int cnt = 0; - - if (sysctl_tcp_abc) { - /* RFC3465: Slow Start - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (tp->bytes_acked < tp->mss_cache) - return; - } + int cnt; /* increase in packets */ + + /* RFC3465: ABC Slow start + * Increase only after a full MSS of bytes is acked + * + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) + return; - if (sysctl_tcp_max_ssthresh > 0 && - tp->snd_cwnd > sysctl_tcp_max_ssthresh) - cnt += sysctl_tcp_max_ssthresh>>1; + if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) + cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else - cnt += tp->snd_cwnd; + cnt = tp->snd_cwnd; /* exponential increase */ - /* RFC3465: We MAY increase by 2 if discovered delayed ack */ + /* RFC3465: ABC + * We MAY increase by 2 if discovered delayed ack + */ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) cnt <<= 1; tp->bytes_acked = 0; -- cgit v1.2.1 From 2ff011efa4cc591ed563241bae3d60bc0eaac210 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 May 2007 00:07:47 -0700 Subject: [TCP]: TCP_CONG_YEAH requires TCP_CONG_VEGAS These two congestion control modules share code. Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index c68196cc56ab..719c1040bd0a 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -577,6 +577,7 @@ config TCP_CONG_VENO config TCP_CONG_YEAH tristate "YeAH TCP" depends on EXPERIMENTAL + select TCP_CONG_VEGAS default n ---help--- YeAH-TCP is a sender-side high-speed enabled TCP congestion control -- cgit v1.2.1 From d739437207064cdcea8f9c81442284106cbcb67f Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 17 May 2007 15:02:21 -0700 Subject: [IPV4]: Correct rp_filter help text. As mentioned in http://bugzilla.kernel.org/show_bug.cgi?id=5015 The helptext implies that this is on by default. This may be true on some distros (Fedora/RHEL have it enabled in /etc/sysctl.conf), but the kernel defaults to it off. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 719c1040bd0a..010fbb2d45e9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -43,11 +43,11 @@ config IP_ADVANCED_ROUTER asymmetric routing (packets from you to a host take a different path than packets from that host to you) or if you operate a non-routing host which has several IP addresses on different interfaces. To turn - rp_filter off use: + rp_filter on use: - echo 0 > /proc/sys/net/ipv4/conf//rp_filter + echo 1 > /proc/sys/net/ipv4/conf//rp_filter or - echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter + echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter If unsure, say N here. -- cgit v1.2.1 From f6c5d736af6bc71cebc017e4001ec5efff1ee116 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 May 2007 02:07:50 -0700 Subject: [IPV4]: Remove IPVS icmp hack from route.c for now. Revert: 2d771cd86d4c3af26f34a7bcdc1b87696824cad9 This is dangerous if enabled and a better solution to the problem is being worked on. Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb76e3c725a0..df9fe4f2e8cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = ip_dev_find(oldflp->fl4_src); - if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind)) + if (dev_out == NULL) goto out; /* I removed check for oif == dev_out->oif here. @@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) of another iface. --ANK */ - if (dev_out && oldflp->oif == 0 + if (oldflp->oif == 0 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* Special hack: user can direct multicasts and limited broadcast via necessary interface -- cgit v1.2.1