summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYosef Etigin <yosefe@voltaire.com>2007-05-19 08:51:54 -0700
committerRoland Dreier <rolandd@cisco.com>2007-05-19 08:51:54 -0700
commit26bbf13ce1ca21ec69175bcc4b995cb8ffdf8593 (patch)
tree4bcad0db9ad75fe820c4f7f31155d984a03d2a7a
parent1af4c435f3ab9cdf72ce86c35a455c8bef1d6536 (diff)
downloadblackbird-op-linux-26bbf13ce1ca21ec69175bcc4b995cb8ffdf8593.tar.gz
blackbird-op-linux-26bbf13ce1ca21ec69175bcc4b995cb8ffdf8593.zip
IPoIB: Handle P_Key table reordering
SM reconfiguration or failover possibly causes a shuffling of the values in the P_Key table. Right now, IPoIB only queries for the P_Key index once when it creates the device QP, and hence there are problems if the index of a P_Key value changes. Fix this by using the PKEY_CHANGE event to trigger a recheck of the P_Key index. Signed-off-by: Yosef Etigin <yosefe@voltaire.com> Acked-by: Michael S. Tsirkin <mst@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c85
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c38
4 files changed, 92 insertions, 45 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 87310eeb6df0..93d4a9a1e1dd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -201,15 +201,17 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
- struct delayed_work pkey_task;
+ struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct flush_task;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
+ struct work_struct pkey_event_task;
struct ib_device *ca;
u8 port;
u16 pkey;
+ u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
struct ib_cq *cq;
@@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 68d72c6f7ffb..e3b0937011d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+ ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ return -1;
+ }
+ set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -1;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -1;
}
@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work(&priv->pkey_task);
+ cancel_delayed_work(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
if (flush)
flush_workqueue(ipoib_workqueue);
@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
return pending;
}
-int ipoib_ib_dev_stop(struct net_device *dev)
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@@ -629,7 +636,8 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
- flush_workqueue(ipoib_workqueue);
+ if (flush)
+ flush_workqueue(ipoib_workqueue);
begin = jiffies;
@@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0;
}
-void ipoib_ib_dev_flush(struct work_struct *work)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
{
- struct ipoib_dev_priv *cpriv, *priv =
- container_of(work, struct ipoib_dev_priv, flush_task);
+ struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
+ u16 new_index;
+
+ mutex_lock(&priv->vlan_mutex);
- if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+ /*
+ * Flush any child interfaces too -- they might be up even if
+ * the parent is down.
+ */
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ __ipoib_ib_dev_flush(cpriv, pkey_event);
+
+ mutex_unlock(&priv->vlan_mutex);
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
return;
}
+ if (pkey_event) {
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ ipoib_ib_dev_down(dev, 0);
+ ipoib_pkey_dev_delay_open(dev);
+ return;
+ }
+ set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
+ /* restart QP only if P_Key index is changed */
+ if (new_index == priv->pkey_index) {
+ ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
+ return;
+ }
+ priv->pkey_index = new_index;
+ }
+
ipoib_dbg(priv, "flushing\n");
ipoib_ib_dev_down(dev, 0);
+ if (pkey_event) {
+ ipoib_ib_dev_stop(dev, 0);
+ ipoib_ib_dev_open(dev);
+ }
+
/*
* The device could have been brought down between the start and when
* we get here, don't bring it back up if it's not configured up
@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
}
+}
- mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, flush_task);
- /* Flush any child interfaces too */
- list_for_each_entry(cpriv, &priv->child_intfs, list)
- ipoib_ib_dev_flush(&cpriv->flush_task);
+ ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+ __ipoib_ib_dev_flush(priv, 0);
+}
- mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, pkey_event_task);
+
+ ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
+ __ipoib_ib_dev_flush(priv, 1);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
void ipoib_pkey_poll(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_task.work);
+ container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
struct net_device *dev = priv->dev;
ipoib_pkey_dev_check_presence(dev);
@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
mutex_lock(&pkey_mutex);
if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
queue_delayed_work(ipoib_workqueue,
- &priv->pkey_task,
+ &priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
}
@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
mutex_lock(&pkey_mutex);
clear_bit(IPOIB_PKEY_STOP, &priv->flags);
queue_delayed_work(ipoib_workqueue,
- &priv->pkey_task,
+ &priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
return 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 0a428f2b05c7..894b1dcdf3eb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
return -EINVAL;
if (ipoib_ib_dev_up(dev)) {
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -EINVAL;
}
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
flush_workqueue(ipoib_workqueue);
ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
- INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
+ INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
+ INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5c3c6a43a52b..791252621b26 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -33,8 +33,6 @@
* $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <rdma/ib_cache.h>
-
#include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
if (!qp_attr)
goto out;
- if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
goto out;
@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- u16 pkey_index;
struct ib_qp_attr qp_attr;
int attr_mask;
- /*
- * Search through the port P_Key table for the requested pkey value.
- * The port has to be assigned to the respective IB partition in
- * advance.
- */
- ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
- if (ret) {
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- return ret;
- }
- set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+ return -1;
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0;
qp_attr.port_num = priv->port;
- qp_attr.pkey_index = pkey_index;
+ qp_attr.pkey_index = priv->pkey_index;
attr_mask =
IB_QP_QKEY |
IB_QP_PORT |
@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
- if ((record->event == IB_EVENT_PORT_ERR ||
- record->event == IB_EVENT_PKEY_CHANGE ||
- record->event == IB_EVENT_PORT_ACTIVE ||
- record->event == IB_EVENT_LID_CHANGE ||
- record->event == IB_EVENT_SM_CHANGE ||
- record->event == IB_EVENT_CLIENT_REREGISTER) &&
- record->element.port_num == priv->port) {
+ if (record->element.port_num != priv->port)
+ return;
+
+ if (record->event == IB_EVENT_PORT_ERR ||
+ record->event == IB_EVENT_PORT_ACTIVE ||
+ record->event == IB_EVENT_LID_CHANGE ||
+ record->event == IB_EVENT_SM_CHANGE ||
+ record->event == IB_EVENT_CLIENT_REREGISTER) {
ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task);
+ } else if (record->event == IB_EVENT_PKEY_CHANGE) {
+ ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
+ queue_work(ipoib_workqueue, &priv->pkey_event_task);
}
}
OpenPOWER on IntegriCloud