diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2018-07-29 11:34:56 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-02 20:27:43 -0600 |
commit | 9f49a5b5c21d58aa84e16cfdc5e99e49faefcb7a (patch) | |
tree | c2c4b7a369b6c4b2690e31d2a8c1b3c0f8023076 /drivers/infiniband | |
parent | eaeb398425089cb3c8edc81a406109db94b2705c (diff) | |
download | blackbird-op-linux-9f49a5b5c21d58aa84e16cfdc5e99e49faefcb7a.tar.gz blackbird-op-linux-9f49a5b5c21d58aa84e16cfdc5e99e49faefcb7a.zip |
RDMA/netdev: Use priv_destructor for netdev cleanup
Now that the unregister_netdev flow for IPoIB no longer relies on external
code we can now introduce the use of priv_destructor and
needs_free_netdev.
The rdma_netdev flow is switched to use the netdev common priv_destructor
instead of the special free_rdma_netdev and the IPOIB ULP adjusted:
- priv_destructor needs to switch to point to the ULP's destructor
which will then call the rdma_ndev's in the right order
- We need to be careful around the error unwind of register_netdev
as it sometimes calls priv_destructor on failure
- ULPs need to use ndo_init/uninit to ensure proper ordering
of failures around register_netdev
Switching to priv_destructor is a necessary pre-requisite to using
the rtnl new_link mechanism.
The VNIC user for rdma_netdev should also be revised, but that is left for
another patch.
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 101 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 68 |
4 files changed, 105 insertions, 76 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 06d6309b719a..13744b4631b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5157,11 +5157,6 @@ done: return num_counters; } -static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) -{ - return mlx5_rdma_netdev_free(netdev); -} - static struct net_device* mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, u8 port_num, @@ -5171,17 +5166,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, void (*setup)(struct net_device *)) { struct net_device *netdev; - struct rdma_netdev *rn; if (type != RDMA_NETDEV_IPOIB) return ERR_PTR(-EOPNOTSUPP); netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, name, setup); - if (likely(!IS_ERR_OR_NULL(netdev))) { - rn = netdev_priv(netdev); - rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev; - } return netdev; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 02ad1a60dc80..d2cb0a8500e3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -323,6 +323,7 @@ struct ipoib_dev_priv { spinlock_t lock; struct net_device *dev; + void (*next_priv_destructor)(struct net_device *dev); struct napi_struct send_napi; struct napi_struct recv_napi; @@ -481,6 +482,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) kref_put(&ah->ref, ipoib_free_ah); } int ipoib_open(struct net_device *dev); +void ipoib_intf_free(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 67ab52eec3e9..73d917d57f93 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2062,6 +2062,13 @@ void ipoib_setup_common(struct net_device *dev) netif_keep_dst(dev); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); + + /* + * unregister_netdev always frees the netdev, we use this mode + * consistently to unify all the various unregister paths, including + * those connected to rtnl_link_ops which require it. + */ + dev->needs_free_netdev = true; } static void ipoib_build_priv(struct net_device *dev) @@ -2116,9 +2123,7 @@ static struct net_device rn->send = ipoib_send; rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; - rn->free_rdma_netdev = free_netdev; rn->hca = hca; - dev->netdev_ops = &ipoib_netdev_default_pf; return dev; @@ -2173,6 +2178,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, rn = netdev_priv(dev); rn->clnt_priv = priv; + + /* + * Only the child register_netdev flows can handle priv_destructor + * being set, so we force it to NULL here and handle manually until it + * is safe to turn on. + */ + priv->next_priv_destructor = dev->priv_destructor; + dev->priv_destructor = NULL; + ipoib_build_priv(dev); return priv; @@ -2181,6 +2195,27 @@ free_priv: return NULL; } +void ipoib_intf_free(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); + + dev->priv_destructor = priv->next_priv_destructor; + if (dev->priv_destructor) + dev->priv_destructor(dev); + + /* + * There are some error flows around register_netdev failing that may + * attempt to call priv_destructor twice, prevent that from happening. + */ + dev->priv_destructor = NULL; + + /* unregister/destroy is very complicated. Make bugs more obvious. */ + rn->clnt_priv = NULL; + + kfree(priv); +} + static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2341,7 +2376,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; - struct rdma_netdev *rn; + struct net_device *ndev; int result; priv = ipoib_intf_alloc(hca, port, format); @@ -2349,6 +2384,7 @@ static struct net_device *ipoib_add_port(const char *format, pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); return ERR_PTR(-ENOMEM); } + ndev = priv->dev; INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); @@ -2357,38 +2393,43 @@ static struct net_device *ipoib_add_port(const char *format, /* call event handler to ensure pkey in sync */ queue_work(ipoib_workqueue, &priv->flush_heavy); - result = register_netdev(priv->dev); + result = register_netdev(ndev); if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", hca->name, port, result); - ipoib_parent_unregister_pre(priv->dev); - goto device_init_failed; + + ipoib_parent_unregister_pre(ndev); + ipoib_intf_free(ndev); + free_netdev(ndev); + + return ERR_PTR(result); } - result = -ENOMEM; - if (ipoib_cm_add_mode_attr(priv->dev)) + /* + * We cannot set priv_destructor before register_netdev because we + * need priv to be always valid during the error flow to execute + * ipoib_parent_unregister_pre(). Instead handle it manually and only + * enter priv_destructor mode once we are completely registered. + */ + ndev->priv_destructor = ipoib_intf_free; + + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) + if (device_create_file(&ndev->dev, &dev_attr_create_child)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) + if (device_create_file(&ndev->dev, &dev_attr_delete_child)) goto sysfs_failed; - return priv->dev; + return ndev; sysfs_failed: - ipoib_parent_unregister_pre(priv->dev); - unregister_netdev(priv->dev); - -device_init_failed: - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - - return ERR_PTR(result); + ipoib_parent_unregister_pre(ndev); + unregister_netdev(ndev); + return ERR_PTR(-ENOMEM); } static void ipoib_add_one(struct ib_device *device) @@ -2426,33 +2467,19 @@ static void ipoib_add_one(struct ib_device *device) static void ipoib_remove_one(struct ib_device *device, void *client_data) { - struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv; + struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list = client_data; if (!dev_list) return; list_for_each_entry_safe(priv, tmp, dev_list, list) { - struct rdma_netdev *parent_rn = netdev_priv(priv->dev); - ipoib_parent_unregister_pre(priv->dev); /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ mutex_lock(&priv->sysfs_mutex); unregister_netdev(priv->dev); mutex_unlock(&priv->sysfs_mutex); - - parent_rn->free_rdma_netdev(priv->dev); - - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - struct rdma_netdev *child_rn; - - child_rn = netdev_priv(cpriv->dev); - child_rn->free_rdma_netdev(cpriv->dev); - kfree(cpriv); - } - - kfree(priv); } kfree(dev_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 3103729a73fd..7776334cf8c5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -50,31 +50,53 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +/* + * NOTE: If this function fails then the priv->dev will remain valid, however + * priv can have been freed and must not be touched by caller in the error + * case. + * + * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to + * free the net_device (just as rtnl_newlink does) otherwise the net_device + * will be freed when the rtnl is unlocked. + */ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, u16 pkey, int type) { + struct net_device *ndev = priv->dev; int result; + ASSERT_RTNL(); + priv->parent = ppriv->dev; priv->pkey = pkey; priv->child_type = type; - result = register_netdevice(priv->dev); + /* We do not need to touch priv if register_netdevice fails */ + ndev->priv_destructor = ipoib_intf_free; + + result = register_netdevice(ndev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); + + /* + * register_netdevice sometimes calls priv_destructor, + * sometimes not. Make sure it was done. + */ + if (ndev->priv_destructor) + ndev->priv_destructor(ndev); return result; } /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { - if (ipoib_cm_add_mode_attr(priv->dev)) + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_parent)) + if (device_create_file(&ndev->dev, &dev_attr_parent)) goto sysfs_failed; } @@ -91,6 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; + struct net_device *ndev; struct ipoib_dev_priv *tpriv; int result; @@ -122,12 +145,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) return restart_syscall(); } - priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); - if (!priv) { - result = -ENOMEM; - goto out; - } - /* * First ensure this isn't a duplicate. We check the parent device and * then all of the legacy child interfaces to make sure the Pkey @@ -146,21 +163,23 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) } } + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); + if (!priv) { + result = -ENOMEM; + goto out; + } + ndev = priv->dev; + result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + if (result && ndev->reg_state == NETREG_UNINITIALIZED) + free_netdev(ndev); + out: up_write(&ppriv->vlan_rwsem); rtnl_unlock(); mutex_unlock(&ppriv->sysfs_mutex); - if (result && priv) { - struct rdma_netdev *rn; - - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - } - return result; } @@ -212,14 +231,5 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) rtnl_unlock(); mutex_unlock(&ppriv->sysfs_mutex); - if (dev) { - struct rdma_netdev *rn; - - rn = netdev_priv(dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - return 0; - } - - return -ENODEV; + return (dev) ? 0 : -ENODEV; } |