From ac3d9dd034e565df2c034ab2ca71f0a9f69153c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 15:27:38 -0700 Subject: netpoll: make ndo_poll_controller() optional As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. It seems that all networking drivers that do use NAPI for their TX completions, should not provide a ndo_poll_controller(). NAPI drivers have netpoll support already handled in core networking stack, since netpoll_poll_dev() uses poll_napi(dev) to iterate through registered NAPI contexts for a device. This patch allows netpoll_poll_dev() to process NAPI contexts even for drivers not providing ndo_poll_controller(), allowing for following patches in NAPI drivers. Also we export netpoll_poll_dev() so that it can be called by bonding/team drivers in following patches. Reported-by: Song Liu Signed-off-by: Eric Dumazet Tested-by: Song Liu Signed-off-by: David S. Miller --- net/core/netpoll.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 57557a6a950c..3219a2932463 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -187,16 +187,16 @@ static void poll_napi(struct net_device *dev) } } -static void netpoll_poll_dev(struct net_device *dev) +void netpoll_poll_dev(struct net_device *dev) { - const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); + const struct net_device_ops *ops; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity * while changing device state */ - if (down_trylock(&ni->dev_lock)) + if (!ni || down_trylock(&ni->dev_lock)) return; if (!netif_running(dev)) { @@ -205,13 +205,8 @@ static void netpoll_poll_dev(struct net_device *dev) } ops = dev->netdev_ops; - if (!ops->ndo_poll_controller) { - up(&ni->dev_lock); - return; - } - - /* Process pending work on NIC */ - ops->ndo_poll_controller(dev); + if (ops->ndo_poll_controller) + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -219,6 +214,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +EXPORT_SYMBOL(netpoll_poll_dev); void netpoll_poll_disable(struct net_device *dev) { @@ -613,8 +609,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) strlcpy(np->dev_name, ndev->name, IFNAMSIZ); INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); - if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || - !ndev->netdev_ops->ndo_poll_controller) { + if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", np->dev_name); err = -ENOTSUPP; -- cgit v1.2.3 From c24498c6827b71f80fecc9fb1b70a792053d41a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:51 -0700 Subject: netpoll: do not test NAPI_STATE_SCHED in poll_one_napi() Since we do no longer require NAPI drivers to provide an ndo_poll_controller(), napi_schedule() has not been done before poll_one_napi() invocation. So testing NAPI_STATE_SCHED is likely to cause early returns. While we are at it, remove outdated comment. Note to future bisections : This change might surface prior bugs in drivers. See commit 73f21c653f93 ("bnxt_en: Fix TX timeout during netpoll.") for one occurrence. Fixes: ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional") Signed-off-by: Eric Dumazet Tested-by: Song Liu Cc: Michael Chan Signed-off-by: David S. Miller --- net/core/netpoll.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3219a2932463..3ae899805f8b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work) } } -/* - * Check whether delayed processing was scheduled for our NIC. If so, - * we attempt to grab the poll lock and use ->poll() to pump the card. - * If this fails, either we've recursed in ->poll() or it's already - * running on another CPU. - * - * Note: we don't mask interrupts with this lock because we're using - * trylock here and interrupts are already disabled in the softirq - * case. Further, we test the poll_owner to avoid recursion on UP - * systems where the lock doesn't exist. - */ static void poll_one_napi(struct napi_struct *napi) { - int work = 0; - - /* net_rx_action's ->poll() invocations and our's are - * synchronized by this test which is only made while - * holding the napi->poll_lock. - */ - if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return; + int work; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need -- cgit v1.2.3 From 6fe9487892b32cb1c8b8b0d552ed7222a527fe30 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 28 Sep 2018 16:26:08 -0400 Subject: bond: take rcu lock in netpoll_send_skb_on_dev The bonding driver lacks the rcu lock when it calls down into netdev_lower_get_next_private_rcu from bond_poll_controller, which results in a trace like: WARNING: CPU: 2 PID: 179 at net/core/dev.c:6567 netdev_lower_get_next_private_rcu+0x34/0x40 CPU: 2 PID: 179 Comm: kworker/u16:15 Not tainted 4.19.0-rc5-backup+ #1 Workqueue: bond0 bond_mii_monitor RIP: 0010:netdev_lower_get_next_private_rcu+0x34/0x40 Code: 48 89 fb e8 fe 29 63 ff 85 c0 74 1e 48 8b 45 00 48 81 c3 c0 00 00 00 48 8b 00 48 39 d8 74 0f 48 89 45 00 48 8b 40 f8 5b 5d c3 <0f> 0b eb de 31 c0 eb f5 0f 1f 40 00 0f 1f 44 00 00 48 8> RSP: 0018:ffffc9000087fa68 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff880429614560 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 00000000ffffffff RDI: ffffffffa184ada0 RBP: ffffc9000087fa80 R08: 0000000000000001 R09: 0000000000000000 R10: ffffc9000087f9f0 R11: ffff880429798040 R12: ffff8804289d5980 R13: ffffffffa1511f60 R14: 00000000000000c8 R15: 00000000ffffffff FS: 0000000000000000(0000) GS:ffff88042f880000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4b78fce180 CR3: 000000018180f006 CR4: 00000000001606e0 Call Trace: bond_poll_controller+0x52/0x170 netpoll_poll_dev+0x79/0x290 netpoll_send_skb_on_dev+0x158/0x2c0 netpoll_send_udp+0x2d5/0x430 write_ext_msg+0x1e0/0x210 console_unlock+0x3c4/0x630 vprintk_emit+0xfa/0x2f0 printk+0x52/0x6e ? __netdev_printk+0x12b/0x220 netdev_info+0x64/0x80 ? bond_3ad_set_carrier+0xe9/0x180 bond_select_active_slave+0x1fc/0x310 bond_mii_monitor+0x709/0x9b0 process_one_work+0x221/0x5e0 worker_thread+0x4f/0x3b0 kthread+0x100/0x140 ? process_one_work+0x5e0/0x5e0 ? kthread_delayed_work_timer_fn+0x90/0x90 ret_from_fork+0x24/0x30 We're also doing rcu dereferences a layer up in netpoll_send_skb_on_dev before we call down into netpoll_poll_dev, so just take the lock there. Suggested-by: Cong Wang Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/netpoll.c') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3ae899805f8b..de1d1ba92f2d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -312,6 +312,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; + rcu_read_lock_bh(); lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); @@ -356,6 +357,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } + rcu_read_unlock_bh(); } EXPORT_SYMBOL(netpoll_send_skb_on_dev); -- cgit v1.2.3